audiobook-creation-exchange 0.1.0

ACX-compliant audio post-processing: normalisation, limiting, gating, LUFS measurement, and spectral analysis for AI-generated speech audio.
Documentation
//! Three-band feed-forward RMS compressor.
//!
//! The signal is split into low / mid / high bands via Linkwitz-Riley 4th-order
//! crossovers (two cascaded 2-pole Butterworth lowpass + highpass pairs).
//! Each band is compressed independently with its own threshold and ratio, then
//! the three bands are summed. The result is more musical than a brickwall
//! limiter: quiet consonants come up, boomy low-mid is controlled, and harsh
//! high-frequency transients are tamed without audible pumping.
//!
//! Default crossover frequencies: 250 Hz (low/mid) and 3 000 Hz (mid/high).

/// Parameters for a single compressor band.
#[derive(Debug, Clone, Copy)]
pub struct BandParams {
    /// RMS threshold in dBFS above which compression begins.
    pub threshold_db: f32,
    /// Compression ratio (e.g. 3.0 = 3:1).
    pub ratio: f32,
    /// Attack time constant in seconds.
    pub attack_s: f32,
    /// Release time constant in seconds.
    pub release_s: f32,
    /// Make-up gain in dB applied after compression.
    pub makeup_db: f32,
}

/// Full multiband compressor configuration.
#[derive(Debug, Clone, Copy)]
pub struct MultibandParams {
    pub low: BandParams,
    pub mid: BandParams,
    pub high: BandParams,
    /// Crossover between low and mid band (Hz).
    pub low_mid_hz: f32,
    /// Crossover between mid and high band (Hz).
    pub mid_high_hz: f32,
}

impl Default for MultibandParams {
    fn default() -> Self {
        Self {
            low: BandParams {
                threshold_db: -28.0,
                ratio: 2.0,
                attack_s: 0.05,
                release_s: 0.20,
                makeup_db: 0.0,
            },
            mid: BandParams {
                threshold_db: -22.0,
                ratio: 3.0,
                attack_s: 0.01,
                release_s: 0.08,
                makeup_db: 0.5,
            },
            high: BandParams {
                threshold_db: -26.0,
                ratio: 4.0,
                attack_s: 0.005,
                release_s: 0.06,
                makeup_db: 0.0,
            },
            low_mid_hz: 250.0,
            mid_high_hz: 3_000.0,
        }
    }
}

/// Apply multiband compression with default parameters.
pub fn compress(samples: &mut [i16], sample_rate: u32) {
    compress_with_params(samples, sample_rate, &MultibandParams::default());
}

/// Apply multiband compression with explicit parameters.
pub fn compress_with_params(samples: &mut [i16], sample_rate: u32, params: &MultibandParams) {
    if samples.is_empty() || sample_rate == 0 {
        return;
    }
    let sr = sample_rate as f32;
    let n = samples.len();

    // Split into three bands.
    let signal: Vec<f32> = samples.iter().map(|&s| s as f32).collect();

    let low = lowpass_lr4(&signal, params.low_mid_hz, sr);
    let high_full = highpass_lr4(&signal, params.low_mid_hz, sr);
    let mid = lowpass_lr4(&high_full, params.mid_high_hz, sr);
    let high = highpass_lr4(&high_full, params.mid_high_hz, sr);

    // Compress each band independently.
    let low_c = apply_compression(&low, sample_rate, &params.low);
    let mid_c = apply_compression(&mid, sample_rate, &params.mid);
    let high_c = apply_compression(&high, sample_rate, &params.high);

    // Sum bands back.
    for i in 0..n {
        let v = low_c[i] + mid_c[i] + high_c[i];
        samples[i] = v.round().clamp(i16::MIN as f32, i16::MAX as f32) as i16;
    }
}

// ── Feed-forward RMS compressor ───────────────────────────────────────────────

fn apply_compression(band: &[f32], sample_rate: u32, p: &BandParams) -> Vec<f32> {
    let n = band.len();
    let sr = sample_rate as f32;
    let threshold_linear = 10f32.powf(p.threshold_db / 20.0) * i16::MAX as f32;
    let makeup = 10f32.powf(p.makeup_db / 20.0);
    let attack_coeff = (-1.0 / (p.attack_s * sr)).exp();
    let release_coeff = (-1.0 / (p.release_s * sr)).exp();

    // RMS envelope follower window: 10 ms.
    let env_window = ((sr * 0.010) as usize).max(1);
    let mut out = vec![0f32; n];
    let mut envelope = 0f32;

    for i in 0..n {
        // RMS of a centred window.
        let half = env_window / 2;
        let w_start = i.saturating_sub(half);
        let w_end = (i + half + 1).min(n);
        let sq: f32 =
            band[w_start..w_end].iter().map(|&s| s * s).sum::<f32>() / (w_end - w_start) as f32;
        let rms = sq.sqrt();

        // Smooth the envelope.
        let coeff = if rms > envelope {
            attack_coeff
        } else {
            release_coeff
        };
        envelope = envelope * coeff + rms * (1.0 - coeff);

        // Gain computation.
        let gain = if envelope > threshold_linear {
            let excess_db = 20.0 * (envelope / threshold_linear).log10();
            let gain_db = -(excess_db * (1.0 - 1.0 / p.ratio));
            10f32.powf(gain_db / 20.0)
        } else {
            1.0
        };

        out[i] = band[i] * gain * makeup;
    }
    out
}

// ── Linkwitz-Riley 4th-order crossover filters ────────────────────────────────
// LR4 = two cascaded Butterworth 2nd-order sections with the same cutoff.

fn lowpass_lr4(signal: &[f32], freq_hz: f32, sr: f32) -> Vec<f32> {
    let (b0, b1, b2, a1, a2) = butterworth2_lp(freq_hz, sr);
    let pass1 = biquad_filter(signal, b0, b1, b2, a1, a2);
    biquad_filter(&pass1, b0, b1, b2, a1, a2)
}

fn highpass_lr4(signal: &[f32], freq_hz: f32, sr: f32) -> Vec<f32> {
    let (b0, b1, b2, a1, a2) = butterworth2_hp(freq_hz, sr);
    let pass1 = biquad_filter(signal, b0, b1, b2, a1, a2);
    biquad_filter(&pass1, b0, b1, b2, a1, a2)
}

fn butterworth2_lp(freq_hz: f32, sr: f32) -> (f32, f32, f32, f32, f32) {
    let w0 = 2.0 * std::f32::consts::PI * freq_hz / sr;
    let cos_w0 = w0.cos();
    let sin_w0 = w0.sin();
    let alpha = sin_w0 / std::f32::consts::SQRT_2; // Q = 1/√2 for Butterworth

    let a0 = 1.0 + alpha;
    let b0 = (1.0 - cos_w0) / 2.0 / a0;
    let b1 = (1.0 - cos_w0) / a0;
    let b2 = (1.0 - cos_w0) / 2.0 / a0;
    let a1 = -2.0 * cos_w0 / a0;
    let a2 = (1.0 - alpha) / a0;
    (b0, b1, b2, a1, a2)
}

fn butterworth2_hp(freq_hz: f32, sr: f32) -> (f32, f32, f32, f32, f32) {
    let w0 = 2.0 * std::f32::consts::PI * freq_hz / sr;
    let cos_w0 = w0.cos();
    let sin_w0 = w0.sin();
    let alpha = sin_w0 / std::f32::consts::SQRT_2;

    let a0 = 1.0 + alpha;
    let b0 = (1.0 + cos_w0) / 2.0 / a0;
    let b1 = -(1.0 + cos_w0) / a0;
    let b2 = (1.0 + cos_w0) / 2.0 / a0;
    let a1 = -2.0 * cos_w0 / a0;
    let a2 = (1.0 - alpha) / a0;
    (b0, b1, b2, a1, a2)
}

fn biquad_filter(signal: &[f32], b0: f32, b1: f32, b2: f32, a1: f32, a2: f32) -> Vec<f32> {
    let mut out = vec![0f32; signal.len()];
    let mut x1 = 0f32;
    let mut x2 = 0f32;
    let mut y1 = 0f32;
    let mut y2 = 0f32;
    for (i, &x) in signal.iter().enumerate() {
        let y = b0 * x + b1 * x1 + b2 * x2 - a1 * y1 - a2 * y2;
        x2 = x1;
        x1 = x;
        y2 = y1;
        y1 = y;
        out[i] = y;
    }
    out
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::analyse::rms_db;

    const SR: u32 = 24_000;

    fn pure_tone(freq_hz: f32, amplitude: f32, secs: f32) -> Vec<i16> {
        let n = (SR as f32 * secs) as usize;
        (0..n)
            .map(|i| {
                let v =
                    amplitude * (2.0 * std::f32::consts::PI * freq_hz * i as f32 / SR as f32).sin();
                v.clamp(i16::MIN as f32, i16::MAX as f32) as i16
            })
            .collect()
    }

    #[test]
    fn empty_input_is_a_no_op() {
        let mut samples: Vec<i16> = Vec::new();
        compress(&mut samples, SR);
    }

    #[test]
    fn loud_mid_band_is_reduced() {
        // Loud 1 kHz tone — sits in the mid band above threshold.
        let original = pure_tone(1_000.0, 20_000.0, 0.5);
        let mut compressed = original.clone();
        compress(&mut compressed, SR);

        let before = rms_db(&original);
        let after = rms_db(&compressed);
        assert!(
            after < before,
            "Loud mid-band tone not compressed: before={:.1} after={:.1}",
            before,
            after
        );
    }

    #[test]
    fn quiet_signal_passes_through_unchanged() {
        // Well below every threshold — should pass through with < 1 dB change.
        let original = pure_tone(1_000.0, 500.0, 0.5);
        let mut processed = original.clone();
        compress(&mut processed, SR);

        let before = rms_db(&original);
        let after = rms_db(&processed);
        assert!(
            (before - after).abs() < 2.0,
            "Quiet signal altered by {:.2} dB",
            before - after
        );
    }

    #[test]
    fn lowpass_attenuates_high_frequency() {
        // 8 kHz tone fed through a 250 Hz LP4 — should be suppressed by > 40 dB.
        let signal: Vec<f32> = pure_tone(8_000.0, 5_000.0, 0.1)
            .iter()
            .map(|&s| s as f32)
            .collect();
        let low = lowpass_lr4(&signal, 250.0, SR as f32);
        let skip = (SR as usize * 20) / 1000; // skip 20 ms transient
        let rms_in: f32 = {
            let sq: f32 = signal[skip..].iter().map(|&s| s * s).sum();
            (sq / signal[skip..].len() as f32).sqrt().max(1.0)
        };
        let rms_out: f32 = {
            let sq: f32 = low[skip..].iter().map(|&s| s * s).sum();
            (sq / low[skip..].len() as f32).sqrt().max(1e-9)
        };
        let attenuation_db = 20.0 * (rms_in / rms_out).log10();
        assert!(
            attenuation_db > 40.0,
            "LP4 at 250 Hz attenuated 8 kHz by only {:.1} dB (expected > 40 dB)",
            attenuation_db
        );
    }

    #[test]
    fn highpass_passes_high_frequency() {
        // 4 kHz tone fed through a 250 Hz HP4 — should pass within 3 dB.
        let signal: Vec<f32> = pure_tone(4_000.0, 5_000.0, 0.1)
            .iter()
            .map(|&s| s as f32)
            .collect();
        let high = highpass_lr4(&signal, 250.0, SR as f32);
        let skip = (SR as usize * 20) / 1000;
        let rms_in: f32 = {
            let sq: f32 = signal[skip..].iter().map(|&s| s * s).sum();
            (sq / signal[skip..].len() as f32).sqrt().max(1.0)
        };
        let rms_out: f32 = {
            let sq: f32 = high[skip..].iter().map(|&s| s * s).sum();
            (sq / high[skip..].len() as f32).sqrt().max(1e-9)
        };
        let loss_db = 20.0 * (rms_in / rms_out).log10();
        assert!(
            loss_db < 3.0,
            "HP4 at 250 Hz attenuated 4 kHz by {:.1} dB (expected < 3 dB)",
            loss_db
        );
    }
}