audiobook-creation-exchange 0.1.0

ACX-compliant audio post-processing: normalisation, limiting, gating, LUFS measurement, and spectral analysis for AI-generated speech audio.
Documentation
//! Click and impulse suppressor.
//!
//! TTS synthesis engines occasionally emit very short transients — glottal-stop
//! artifacts, codec glitches — that survive de-essing and limiting unchanged
//! because they are too brief to carry significant energy in a 50 ms STFT frame.
//!
//! Detection: scan in 5 ms non-overlapping analysis windows. Inside each window,
//! flag any sample whose magnitude exceeds `threshold_factor × local_rms`. If the
//! flagged run is shorter than `max_click_ms`, replace it with cubic Hermite
//! interpolation between the last clean sample before and the first clean sample
//! after the run.

/// Ratio of sample magnitude to local-window RMS above which a sample is a click.
pub const DEFAULT_THRESHOLD_FACTOR: f32 = 6.0;
/// Maximum consecutive samples that qualify as a click (longer = plosive, not click).
pub const DEFAULT_MAX_CLICK_MS: usize = 8;

const ANALYSIS_WINDOW_MS: usize = 5;

/// Suppress clicks using default parameters.
pub fn suppress_clicks(samples: &mut [i16], sample_rate: u32) {
    suppress_clicks_with_params(
        samples,
        sample_rate,
        DEFAULT_THRESHOLD_FACTOR,
        DEFAULT_MAX_CLICK_MS,
    );
}

/// Suppress clicks with explicit `threshold_factor` and `max_click_ms`.
pub fn suppress_clicks_with_params(
    samples: &mut [i16],
    sample_rate: u32,
    threshold_factor: f32,
    max_click_ms: usize,
) {
    if samples.len() < 4 || sample_rate == 0 {
        return;
    }

    let window_size = ((sample_rate as usize * ANALYSIS_WINDOW_MS) / 1000).max(1);
    let max_click_samples = ((sample_rate as usize * max_click_ms) / 1000).max(1);

    // Build per-sample local RMS from non-overlapping analysis windows.
    let n = samples.len();
    let mut local_rms = vec![0f32; n];
    let mut pos = 0;
    while pos < n {
        let end = (pos + window_size).min(n);
        let sq: f64 = samples[pos..end].iter().map(|&s| (s as f64).powi(2)).sum();
        let rms = (sq / (end - pos) as f64).sqrt() as f32;
        for v in local_rms[pos..end].iter_mut() {
            *v = rms;
        }
        pos = end;
    }

    // Detect and repair click runs.
    let threshold_abs = threshold_factor;
    let mut i = 0;
    while i < n {
        let rms = local_rms[i];
        if rms < 1.0 {
            // Near-silence window — skip (would divide by near-zero rms).
            i += 1;
            continue;
        }
        let limit = rms * threshold_abs;
        if (samples[i] as f32).abs() > limit {
            // Find end of the flagged run.
            let run_start = i;
            let mut run_end = i + 1;
            while run_end < n && (samples[run_end] as f32).abs() > limit {
                run_end += 1;
            }
            let run_len = run_end - run_start;
            if run_len <= max_click_samples {
                // Interpolate: sample before run → sample after run.
                let v_before = if run_start > 0 {
                    samples[run_start - 1] as f32
                } else {
                    0.0
                };
                let v_after = if run_end < n {
                    samples[run_end] as f32
                } else {
                    0.0
                };
                for k in 0..run_len {
                    let t = (k + 1) as f32 / (run_len + 1) as f32;
                    // Cubic Hermite with zero tangents — smooth step.
                    let t2 = t * t;
                    let t3 = t2 * t;
                    let h = 3.0 * t2 - 2.0 * t3;
                    let v = v_before + (v_after - v_before) * h;
                    samples[run_start + k] =
                        v.round().clamp(i16::MIN as f32, i16::MAX as f32) as i16;
                }
                i = run_end;
            } else {
                // Too long — leave it for the plosive suppressor.
                i = run_end;
            }
        } else {
            i += 1;
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    const SR: u32 = 24_000;

    #[test]
    fn empty_input_is_a_no_op() {
        let mut samples: Vec<i16> = Vec::new();
        suppress_clicks(&mut samples, SR); // must not panic
    }

    #[test]
    fn single_spike_is_removed() {
        // Flat signal at 1000 with one 20 000 spike.
        let mut samples = vec![1000i16; 500];
        samples[250] = 20_000;
        suppress_clicks(&mut samples, SR);
        assert!(
            samples[250].abs() < 5_000,
            "Spike not suppressed: samples[250] = {}",
            samples[250]
        );
    }

    #[test]
    fn clean_signal_is_preserved() {
        // Smooth sine — nothing should be flagged.
        let n = SR as usize / 10;
        let original: Vec<i16> = (0..n)
            .map(|i| {
                let v = 5_000.0 * (2.0 * std::f32::consts::PI * 440.0 * i as f32 / SR as f32).sin();
                v.round() as i16
            })
            .collect();
        let mut processed = original.clone();
        suppress_clicks(&mut processed, SR);
        // Sum of squared differences should be tiny.
        let diff: f64 = original
            .iter()
            .zip(processed.iter())
            .map(|(&a, &b)| ((a - b) as f64).powi(2))
            .sum();
        assert!(
            diff < 1e6,
            "Clean signal modified by click suppressor (total sq diff = {:.0})",
            diff
        );
    }

    #[test]
    fn run_longer_than_max_is_left_alone() {
        // 20 ms run of loud samples — too long to be a click; must not be zeroed.
        let click_len = SR as usize * 20 / 1000;
        let mut samples = vec![1000i16; 1000];
        for s in samples[100..100 + click_len].iter_mut() {
            *s = 20_000;
        }
        suppress_clicks_with_params(&mut samples, SR, 6.0, 5); // max 5 ms
        let all_suppressed = samples[100..100 + click_len]
            .iter()
            .all(|&s| s.abs() < 5_000);
        assert!(
            !all_suppressed,
            "Long run should not be suppressed as a click"
        );
    }
}