audiobook_creation_exchange/
lib.rs

1//! ACX-compliant audio post-processing pipeline for speech audio.
2//!
3//! Validates and repairs raw L16 mono PCM against the ACX audiobook specification
4//! (−23…−18 dBFS RMS, −3 dBFS true-peak, −60 dBFS noise floor) per
5//! <https://www.acx.com/help/acx-producer-standards/201456300>.
6//!
7//! # Pipeline (applied in order by [`process`])
8//!
9//! 1. **Click suppression** — removes sub-10 ms transient spikes via cubic Hermite interpolation.
10//! 2. **DC offset removal** — subtract the mean sample value.
11//! 3. **Noise reduction** — Wiener spectral subtraction profiled from the leading silence
12//!    (disabled by default; requires a silent lead-in — see [`AcxConfig::denoise_enabled`]).
13//! 4. **EQ warmth** — low-shelf (+2 dB @ 180 Hz) + high-shelf (+1.5 dB @ 5 kHz) biquad IIR.
14//! 5. **De-essing** — OLA STFT; reduces 5–8 kHz sibilance band by up to −6 dB.
15//! 6. **Plosive suppression** — OLA STFT; attenuates sub-150 Hz bins on plosive windows.
16//! 7. **Multiband compression** — 3-band LR4 crossover compressor (250 Hz / 3 kHz crossovers);
17//!    runs before normalisation so the level step sees the compressed spectrum.
18//! 8. **Normalise** — linear gain to −20.5 dBFS, pre-compensated for bookend energy dilution.
19//! 9. **Limit** — 5 ms lookahead brickwall limiter; no peak (including 4× interpolated) exceeds −3 dBFS.
20//! 10. **Breath removal** — breath-band windows replaced with room tone (disabled by default).
21//! 11. **Pause normalisation** — caps over-long inter-sentence pauses to natural targets
22//!     (sentence 120 ms, paragraph 400 ms, scene break 700 ms); skips leading/trailing silence.
23//! 12. **Gate** — sub-threshold 50 ms windows replaced with 1/f pink noise room tone.
24//! 13. **Bookend padding** — first 1 s and last 3 s forced to room tone with 10 ms crossfades.
25//! 14. **Verify** — second analysis pass; returns `Err(AcxError::StillNonCompliant)` only when audio is irrecoverably broken.
26//!
27//! # Standalone utilities
28//!
29//! - [`crossfade()`] — equal-power segment transition
30//! - [`consistency_check`] — batch RMS variance check across multiple segments
31//! - [`LoudnessPreset`] — pre-built [`AcxConfig`] variants for different delivery loudness levels
32
33pub mod analyse;
34pub mod bitstream;
35pub mod breath;
36pub mod click;
37pub mod consistency;
38pub mod crossfade;
39pub mod dc_offset;
40pub mod deess;
41pub mod denoise;
42pub mod eq;
43pub mod error;
44pub mod gate;
45pub mod limiter;
46pub mod loudness_preset;
47pub mod lufs;
48pub mod multiband;
49pub mod normalise;
50pub mod pause_norm;
51pub mod plosive;
52pub mod room_tone;
53pub mod spectral;
54pub mod temporal;
55
56pub use analyse::{AcxReport, analyse};
57pub use bitstream::{CbrReport, Id3Report, check_cbr, check_id3_tags};
58pub use consistency::{ConsistencyReport, consistency_check};
59pub use crossfade::crossfade;
60pub use error::AcxError;
61pub use loudness_preset::LoudnessPreset;
62pub use lufs::{LufsReport, integrated_lufs, loudness_range};
63pub use multiband::MultibandParams;
64pub use spectral::{SpectralViolation, SpectralViolationKind, scan as spectral_scan};
65pub use temporal::{
66    DeadAirViolation, check_bookends, count_digital_zero_runs, detect_dead_air, max_dead_air,
67};
68
69/// Audio processing configuration.
70///
71/// Defaults match the published ACX audiobook specification (−20.5 dBFS target,
72/// −23…−18 dBFS RMS window, −3 dBFS peak, −60 dBFS noise floor). Construct with
73/// [`AcxConfig::default`] and override individual fields as needed, or use
74/// [`LoudnessPreset::config`] to get a pre-built variant for a specific loudness level.
75#[derive(Debug, Clone)]
76pub struct AcxConfig {
77    /// Target RMS to normalise to.
78    pub rms_target_db: f32,
79    /// Lower bound of the acceptable RMS range.
80    pub rms_min_db: f32,
81    /// Upper bound of the acceptable RMS range.
82    pub rms_max_db: f32,
83    /// True-peak ceiling — no sample (incl. inter-sample peaks) may exceed this.
84    pub peak_ceiling_db: f32,
85    /// Noise floor must not exceed this value.
86    pub noise_floor_max_db: f32,
87    /// Windows below this RMS are replaced with room tone by the gate.
88    pub silence_threshold_db: f32,
89    /// RMS level for synthesised room tone — must be below `noise_floor_max_db`.
90    pub room_tone_db: f32,
91    /// Maximum contiguous silence allowed inside a chapter.
92    pub dead_air_limit: time::Duration,
93    /// Sibilance band-energy ratio above which a window is flagged.
94    pub sibilance_ratio_threshold: f32,
95    /// Plosive band-energy ratio above which a window is flagged.
96    pub plosive_ratio_threshold: f32,
97
98    // ── pipeline feature flags ─────────────────────────────────────────────
99    /// Remove sub-10 ms click artifacts before spectral processing.
100    pub click_suppression_enabled: bool,
101    /// Apply Wiener spectral subtraction noise reduction.
102    ///
103    /// Disabled by default. The profiling window is the first `denoise_profile_ms`
104    /// milliseconds of the signal; if that region contains speech instead of room
105    /// tone the speech itself will be treated as noise and attenuated.
106    pub denoise_enabled: bool,
107    /// Length of the noise-profile segment at the head of the signal (ms).
108    pub denoise_profile_ms: u32,
109    /// Oversubtraction factor for noise reduction (>1 compensates for estimation error).
110    pub denoise_oversubtraction: f32,
111    /// Spectral floor for noise reduction — bins are never reduced below this gain.
112    pub denoise_spectral_floor: f32,
113    /// Apply warmth shelving EQ (low-shelf body + high-shelf presence) in the pipeline.
114    pub eq_enabled: bool,
115    /// Low-shelf gain in dB (positive = boost).
116    pub eq_low_shelf_db: f32,
117    /// High-shelf gain in dB (positive = boost).
118    pub eq_high_shelf_db: f32,
119    /// Apply frequency-selective de-essing (5–8 kHz gain reduction) in the pipeline.
120    pub deess_enabled: bool,
121    /// Sibilance energy ratio above which de-essing begins (0.0–1.0).
122    pub deess_threshold_ratio: f32,
123    /// Maximum dB reduction applied by the de-esser (positive value = attenuation).
124    pub deess_max_reduction_db: f32,
125    /// Apply plosive suppression (sub-150 Hz shelving on plosive windows) in the pipeline.
126    pub plosive_suppression_enabled: bool,
127    /// Attenuation applied to the plosive band when suppression fires (dB, positive = cut).
128    pub plosive_attenuation_db: f32,
129    /// Apply 3-band multiband compression after normalisation.
130    pub multiband_enabled: bool,
131    /// Remove breath sounds (replace with room tone) in the pipeline.
132    /// Disabled by default — preserving breaths keeps narration feeling natural.
133    pub breath_removal_enabled: bool,
134    /// Cap over-long pauses to natural sentence/paragraph/scene-break targets.
135    pub pause_norm_enabled: bool,
136    /// Target length for sentence-level pauses (< 200 ms) after capping, in ms.
137    pub pause_sentence_target_ms: u32,
138    /// Target length for paragraph-level pauses (200–800 ms) after capping, in ms.
139    pub pause_paragraph_target_ms: u32,
140    /// Target length for scene-break pauses (> 800 ms) after capping, in ms.
141    pub pause_scene_target_ms: u32,
142}
143
144impl Default for AcxConfig {
145    fn default() -> Self {
146        Self {
147            rms_target_db: -20.5,
148            rms_min_db: -23.0,
149            rms_max_db: -18.0,
150            peak_ceiling_db: -3.0,
151            noise_floor_max_db: -60.0,
152            silence_threshold_db: -65.0,
153            room_tone_db: -62.0, // 2 dB below limit — inaudible but non-zero
154            dead_air_limit: temporal::DEAD_AIR_LIMIT,
155            sibilance_ratio_threshold: spectral::SIBILANCE_RATIO_THRESHOLD,
156            plosive_ratio_threshold: spectral::PLOSIVE_RATIO_THRESHOLD,
157            click_suppression_enabled: true,
158            denoise_enabled: false,
159            denoise_profile_ms: denoise::DEFAULT_PROFILE_MS,
160            denoise_oversubtraction: denoise::DEFAULT_OVERSUBTRACTION,
161            denoise_spectral_floor: denoise::DEFAULT_SPECTRAL_FLOOR,
162            eq_enabled: true,
163            eq_low_shelf_db: eq::DEFAULT_LOW_SHELF_DB,
164            eq_high_shelf_db: eq::DEFAULT_HIGH_SHELF_DB,
165            deess_enabled: true,
166            deess_threshold_ratio: deess::DEFAULT_THRESHOLD_RATIO,
167            deess_max_reduction_db: deess::DEFAULT_MAX_REDUCTION_DB,
168            plosive_suppression_enabled: true,
169            plosive_attenuation_db: plosive::DEFAULT_ATTENUATION_DB,
170            multiband_enabled: true,
171            breath_removal_enabled: false,
172            pause_norm_enabled: true,
173            pause_sentence_target_ms: pause_norm::DEFAULT_SENTENCE_TARGET_MS,
174            pause_paragraph_target_ms: pause_norm::DEFAULT_PARAGRAPH_TARGET_MS,
175            pause_scene_target_ms: pause_norm::DEFAULT_SCENE_TARGET_MS,
176        }
177    }
178}
179
180/// Full diagnostic report produced by [`validate`].
181///
182/// Covers every ACX/EBU/ITU quality axis in a single pass over the PCM buffer.
183/// This is read-only — none of the fields imply automatic repair.
184#[derive(Debug, Clone)]
185pub struct DiagnosticReport {
186    // --- ACX core ---
187    /// Overall RMS in dBFS.
188    pub rms_db: f32,
189    /// True-peak in dBFS (4× oversampled).
190    pub peak_db: f32,
191    /// Noise floor in dBFS (quietest 10 % of 50 ms windows).
192    pub noise_floor_db: f32,
193    /// Whether all ACX core metrics are in spec.
194    pub acx_compliant: bool,
195
196    // --- DC offset ---
197    /// DC offset as a fraction of full scale (−1.0…1.0).
198    pub dc_offset: f32,
199    /// Whether DC offset exceeds 1 % of full scale.
200    pub has_dc_offset: bool,
201
202    // --- Spectral ---
203    /// Windows with excessive high-frequency energy (sibilance or plosives).
204    pub spectral_violations: Vec<SpectralViolation>,
205
206    // --- Temporal ---
207    /// Contiguous silence blocks longer than the dead-air limit.
208    pub dead_air_violations: Vec<DeadAirViolation>,
209    /// Whether the head (first 1 s) is at room-tone level or below.
210    pub head_ok: bool,
211    /// Whether the tail (last 3 s) is at room-tone level or below.
212    pub tail_ok: bool,
213    /// Number of digital-zero runs (≥ 1 ms of consecutive zeros).
214    pub digital_zero_runs: usize,
215
216    // --- LUFS / LRA ---
217    /// Integrated LUFS per ITU-R BS.1770-4.
218    pub integrated_lufs: f32,
219    /// Loudness range in LU per EBU R 128.
220    pub loudness_range: f32,
221}
222
223/// Run a full diagnostic pass on raw L16-LE PCM bytes without modifying them.
224///
225/// Use this before [`process`] to decide whether repair is needed and to
226/// surface any quality issues to the caller.
227pub fn validate(pcm_bytes: &[u8], sample_rate: u32) -> Result<DiagnosticReport, AcxError> {
228    validate_with_config(pcm_bytes, sample_rate, &AcxConfig::default())
229}
230
231/// Like [`validate`] but with a custom [`AcxConfig`].
232pub fn validate_with_config(
233    pcm_bytes: &[u8],
234    sample_rate: u32,
235    cfg: &AcxConfig,
236) -> Result<DiagnosticReport, AcxError> {
237    if pcm_bytes.is_empty() {
238        return Err(AcxError::EmptyInput);
239    }
240
241    let samples = bytes_to_samples(pcm_bytes)?;
242
243    let acx = analyse::analyse(&samples, sample_rate, cfg);
244    let dc = dc_offset::measure(&samples);
245    let spectral_violations = spectral::scan(&samples, sample_rate);
246    let dead_air_violations =
247        temporal::detect_dead_air(&samples, sample_rate, cfg.silence_threshold_db);
248    let (head_ok, tail_ok) = temporal::check_bookends(&samples, sample_rate);
249    let digital_zero_runs = temporal::count_digital_zero_runs(&samples);
250    let il = lufs::integrated_lufs(&samples, sample_rate);
251    let lr = lufs::loudness_range(&samples, sample_rate);
252
253    Ok(DiagnosticReport {
254        rms_db: acx.rms_db,
255        peak_db: acx.peak_db,
256        noise_floor_db: acx.noise_floor_db,
257        acx_compliant: acx.compliant,
258        dc_offset: dc,
259        has_dc_offset: dc_offset::has_offset(&samples),
260        spectral_violations,
261        dead_air_violations,
262        head_ok,
263        tail_ok,
264        digital_zero_runs,
265        integrated_lufs: il,
266        loudness_range: lr,
267    })
268}
269
270/// Convert raw L16-LE bytes to i16 samples.
271///
272/// Returns `Err(AcxError::OddByteLength)` if `bytes.len()` is not even.
273pub fn bytes_to_samples(bytes: &[u8]) -> Result<Vec<i16>, AcxError> {
274    if bytes.len() % 2 != 0 {
275        return Err(AcxError::OddByteLength);
276    }
277    Ok(bytes
278        .chunks_exact(2)
279        .map(|c| i16::from_le_bytes([c[0], c[1]]))
280        .collect())
281}
282
283/// Convert i16 samples back to L16-LE bytes.
284pub fn samples_to_bytes(samples: &[i16]) -> Vec<u8> {
285    samples.iter().flat_map(|&s| s.to_le_bytes()).collect()
286}
287
288/// Run the full ACX post-processing pipeline on raw L16-LE PCM bytes.
289///
290/// # Errors
291///
292/// Returns [`AcxError::OddByteLength`] if input length is odd.
293/// Returns [`AcxError::EmptyInput`] if input is empty.
294/// Returns [`AcxError::StillNonCompliant`] if the audio cannot be brought into
295/// compliance after all processing steps (extremely rare — indicates a segment
296/// that is almost entirely silence).
297pub fn process(pcm_bytes: &[u8], sample_rate: u32) -> Result<Vec<u8>, AcxError> {
298    process_with_config(pcm_bytes, sample_rate, &AcxConfig::default())
299}
300
301/// Like [`process`] but with a custom [`AcxConfig`].
302pub fn process_with_config(
303    pcm_bytes: &[u8],
304    sample_rate: u32,
305    cfg: &AcxConfig,
306) -> Result<Vec<u8>, AcxError> {
307    if pcm_bytes.is_empty() {
308        return Err(AcxError::EmptyInput);
309    }
310
311    let mut samples = bytes_to_samples(pcm_bytes)?;
312
313    // 1. Click suppression — remove sub-10 ms transient spikes before any
314    //    spectral processing (clicks would smear across STFT frames).
315    if cfg.click_suppression_enabled {
316        click::suppress_clicks(&mut samples, sample_rate);
317    }
318
319    // 2. DC offset removal — before any gain changes to avoid skewing RMS target.
320    if dc_offset::has_offset(&samples) {
321        dc_offset::remove(&mut samples);
322    }
323
324    // 3. Noise reduction — profile from the head of the signal (pre-processing),
325    //    then subtract spectrally across all frames.
326    if cfg.denoise_enabled {
327        denoise::denoise_with_params(
328            &mut samples,
329            sample_rate,
330            cfg.denoise_profile_ms,
331            cfg.denoise_oversubtraction,
332            cfg.denoise_spectral_floor,
333        );
334    }
335
336    // 4. Warmth EQ — shape the spectrum before spectral cleanup so the de-esser
337    //    and plosive suppressor see the boosted spectrum and compensate correctly.
338    if cfg.eq_enabled {
339        eq::apply_warmth_with_params(
340            &mut samples,
341            sample_rate,
342            cfg.eq_low_shelf_db,
343            eq::DEFAULT_LOW_SHELF_HZ,
344            cfg.eq_high_shelf_db,
345            eq::DEFAULT_HIGH_SHELF_HZ,
346        );
347    }
348
349    // 5 & 6. Spectral cleanup BEFORE normalisation so the level-setting step sees
350    //     the final spectral shape.  De-essing and plosive suppression are
351    //     amplitude-agnostic (they operate on energy ratios); running them here
352    //     avoids a second limiter pass and keeps the RMS target accurate.
353    if cfg.deess_enabled {
354        deess::deess_with_params(
355            &mut samples,
356            sample_rate,
357            cfg.deess_threshold_ratio,
358            cfg.deess_max_reduction_db,
359        );
360    }
361    if cfg.plosive_suppression_enabled {
362        plosive::suppress_plosives_with_attenuation(
363            &mut samples,
364            sample_rate,
365            cfg.plosive_attenuation_db,
366        );
367    }
368
369    // 7a. Multiband compression BEFORE normalise — so the normaliser sees the
370    //     compressed spectral energy and can reach the RMS target accurately.
371    //     Running after normalise would undercut the target by the compression gain.
372    if cfg.multiband_enabled {
373        multiband::compress(&mut samples, sample_rate);
374    }
375
376    // 7. Normalise to target RMS — pre-compensated for bookend energy loss.
377    //
378    // pad_bookends (step 3) replaces HEAD + TAIL with room tone at room_tone_db,
379    // diluting the overall RMS.  We adjust the normalise target so the FINAL
380    // post-pad RMS equals cfg.rms_target_db.
381    //
382    // Derivation: after pad_bookends the middle portion has been scaled by the
383    // normalise gain g (= N_linear / R_in), so its RMS is g·R_middle.
384    // Energy conservation:
385    //   target² = (S-B)/S · (g·R_middle)²  +  B/S · tone²
386    //   g        = sqrt[(target² - B/S·tone²) · S / ((S-B)·R_middle²)]
387    //   N        = g · R_in
388    //            = target · sqrt[S/(S-B)] · (R_in/R_middle)   [tone² negligible]
389    //   N_db     = target_db + 10·log₁₀(S/(S-B)) + (R_in_db − R_middle_db)
390    //
391    // If the bookend regions are already silence, R_in < R_middle, the ratio
392    // term is negative and exactly cancels the S/(S-B) boost — no compensation
393    // is needed and the formula self-corrects.
394    let normalise_target = {
395        let head_s =
396            (sample_rate as usize * temporal::HEAD_DURATION.whole_milliseconds() as usize) / 1000;
397        let tail_s =
398            (sample_rate as usize * temporal::TAIL_DURATION.whole_milliseconds() as usize) / 1000;
399        let speech_start = head_s.min(samples.len());
400        let speech_end = samples.len().saturating_sub(tail_s).max(speech_start);
401
402        if speech_start < speech_end {
403            let overall_rms_db = analyse::rms_db(&samples);
404            let middle_rms_db = analyse::rms_db(&samples[speech_start..speech_end]);
405            let s = samples.len() as f32;
406            let b = (head_s + tail_s).min(samples.len()) as f32;
407            // N = target + 10·log10(S/(S-B)) + (R_overall - R_middle)
408            cfg.rms_target_db + 10.0 * (s / (s - b)).log10() + (overall_rms_db - middle_rms_db)
409        } else {
410            cfg.rms_target_db
411        }
412    };
413    normalise::normalise(&mut samples, normalise_target);
414
415    // 9. Brickwall peak limiter — safety net after compression; also absorbs any
416    //    peaks introduced by OLA or EQ processing.
417    limiter::limit(&mut samples, sample_rate, cfg.peak_ceiling_db);
418
419    // 10. Breath removal after normalisation so room-tone fill sits at the correct
420    //     level relative to the normalised signal.
421    if cfg.breath_removal_enabled {
422        breath::remove_breaths(&mut samples, sample_rate, cfg.room_tone_db);
423    }
424
425    // 11. Pause normalisation — cap over-long TTS pauses before the gate runs,
426    //     so the gate fills correctly-sized silence windows.
427    if cfg.pause_norm_enabled {
428        samples = pause_norm::normalize_pauses_with_targets(
429            &samples,
430            sample_rate,
431            cfg.pause_sentence_target_ms,
432            cfg.pause_paragraph_target_ms,
433            cfg.pause_scene_target_ms,
434        );
435    }
436
437    // 12. Gate sub-threshold windows → room tone + enforce head/tail bookends
438    let tone_samples = sample_rate as usize / 2; // 500 ms is enough to tile from
439    let tone = room_tone::generate_room_tone(tone_samples, cfg.room_tone_db);
440    gate::gate_to_room_tone(&mut samples, sample_rate, cfg.silence_threshold_db, &tone);
441    gate::pad_bookends(&mut samples, sample_rate, &tone);
442
443    // 14. Verify
444    let report = analyse::analyse(&samples, sample_rate, cfg);
445    if !report.compliant {
446        return Err(AcxError::StillNonCompliant {
447            rms_db: report.rms_db,
448            rms_min: cfg.rms_min_db,
449            rms_max: cfg.rms_max_db,
450            peak_db: report.peak_db,
451            peak_ceiling: cfg.peak_ceiling_db,
452            noise_floor_db: report.noise_floor_db,
453            noise_floor_max: cfg.noise_floor_max_db,
454        });
455    }
456
457    Ok(samples_to_bytes(&samples))
458}
459
460#[cfg(test)]
461mod tests {
462    use super::*;
463
464    const SAMPLE_RATE: u32 = 24_000;
465
466    fn sine_wave(freq_hz: f32, duration_secs: f32, amplitude: f32, sample_rate: u32) -> Vec<i16> {
467        let n = (sample_rate as f32 * duration_secs) as usize;
468        (0..n)
469            .map(|i| {
470                let t = i as f32 / sample_rate as f32;
471                let v = amplitude * (2.0 * std::f32::consts::PI * freq_hz * t).sin();
472                v.clamp(i16::MIN as f32, i16::MAX as f32) as i16
473            })
474            .collect()
475    }
476
477    fn to_bytes(samples: &[i16]) -> Vec<u8> {
478        samples_to_bytes(samples)
479    }
480
481    /// Alternating 300 ms sine + 50 ms silence — mimics narration pauses.
482    fn speech_like(amplitude: f32, total_secs: f32, sample_rate: u32) -> Vec<i16> {
483        let speech_ms = 300usize;
484        let pause_ms = 50usize;
485        let period_samples = (sample_rate as usize * (speech_ms + pause_ms)) / 1000;
486        let total_samples = (sample_rate as f32 * total_secs) as usize;
487        let speech_samples = (sample_rate as usize * speech_ms) / 1000;
488
489        let mut out = Vec::with_capacity(total_samples);
490        let mut t = 0usize;
491        while out.len() < total_samples {
492            let pos = t % period_samples;
493            if pos < speech_samples {
494                let sine_t = pos as f32 / sample_rate as f32;
495                let v = amplitude * (2.0 * std::f32::consts::PI * 440.0 * sine_t).sin();
496                out.push(v.clamp(i16::MIN as f32, i16::MAX as f32) as i16);
497            } else {
498                out.push(0i16);
499            }
500            t += 1;
501        }
502        out.truncate(total_samples);
503        out
504    }
505
506    #[test]
507    fn normalise_brings_quiet_track_into_window() {
508        // 10 s: pad_bookends consumes 1 s head + 3 s tail, leaving 6 s of speech.
509        let samples = speech_like(1000.0, 10.0, SAMPLE_RATE);
510        let bytes = to_bytes(&samples);
511        let out = process(&bytes, SAMPLE_RATE).unwrap();
512        let out_samples = bytes_to_samples(&out).unwrap();
513        let report = analyse::analyse(&out_samples, SAMPLE_RATE, &AcxConfig::default());
514        assert!(
515            report.rms_db >= -23.0 && report.rms_db <= -18.0,
516            "RMS out of ACX window: {:.1} dB",
517            report.rms_db
518        );
519    }
520
521    #[test]
522    fn limiter_prevents_clipping() {
523        let samples = speech_like(i16::MAX as f32 * 0.99, 10.0, SAMPLE_RATE);
524        let bytes = to_bytes(&samples);
525        let out = process(&bytes, SAMPLE_RATE).unwrap();
526        let out_samples = bytes_to_samples(&out).unwrap();
527        let report = analyse::analyse(&out_samples, SAMPLE_RATE, &AcxConfig::default());
528        assert!(
529            report.peak_db <= -3.0,
530            "Peak exceeded ACX ceiling: {:.1} dB",
531            report.peak_db
532        );
533    }
534
535    #[test]
536    fn gate_replaces_digital_silence() {
537        // 1 s zeros + 8 s speech = 9 s total; pad_bookends takes 4 s leaving 5 s of speech.
538        let mut samples = vec![0i16; SAMPLE_RATE as usize];
539        samples.extend(sine_wave(440.0, 8.0, 3000.0, SAMPLE_RATE));
540        let bytes = to_bytes(&samples);
541        let out = process(&bytes, SAMPLE_RATE).unwrap();
542        let out_samples = bytes_to_samples(&out).unwrap();
543        let silent_half = &out_samples[..SAMPLE_RATE as usize];
544        let floor = analyse::noise_floor_db(silent_half, SAMPLE_RATE);
545        assert!(floor > -144.0, "Gate did not replace digital silence");
546    }
547
548    #[test]
549    fn odd_byte_length_returns_error() {
550        let bytes = vec![0u8; 101];
551        assert!(matches!(
552            process(&bytes, SAMPLE_RATE),
553            Err(AcxError::OddByteLength)
554        ));
555    }
556
557    #[test]
558    fn empty_input_returns_error() {
559        assert!(matches!(
560            process(&[], SAMPLE_RATE),
561            Err(AcxError::EmptyInput)
562        ));
563    }
564
565    #[test]
566    fn room_tone_hits_target_db() {
567        let tone = room_tone::generate_room_tone(SAMPLE_RATE as usize, -62.0);
568        let measured = analyse::rms_db(&tone);
569        assert!(
570            (measured - (-62.0)).abs() < 1.5,
571            "Room tone RMS {:.1} dB too far from −62 dB",
572            measured
573        );
574    }
575
576    #[test]
577    fn analyse_report_is_accurate() {
578        let samples = sine_wave(440.0, 2.0, i16::MAX as f32, SAMPLE_RATE);
579        let cfg = AcxConfig::default();
580        let report = analyse::analyse(&samples, SAMPLE_RATE, &cfg);
581        assert!(!report.compliant);
582        assert!(report.peak_db > cfg.peak_ceiling_db);
583    }
584
585    #[test]
586    fn validate_detects_dc_offset() {
587        // Shift all samples by +1000 — ~3 % of full scale
588        let samples: Vec<i16> = speech_like(2000.0, 2.0, SAMPLE_RATE)
589            .into_iter()
590            .map(|s| s.saturating_add(1000))
591            .collect();
592        let bytes = to_bytes(&samples);
593        let report = validate(&bytes, SAMPLE_RATE).unwrap();
594        assert!(report.has_dc_offset, "Expected DC offset to be detected");
595    }
596
597    #[test]
598    fn validate_returns_lufs_for_speech_signal() {
599        let samples = speech_like(3000.0, 5.0, SAMPLE_RATE);
600        let bytes = to_bytes(&samples);
601        let report = validate(&bytes, SAMPLE_RATE).unwrap();
602        // LUFS should be a finite negative number for audible content
603        assert!(
604            report.integrated_lufs < 0.0 && report.integrated_lufs > -144.0,
605            "Unexpected LUFS: {:.1}",
606            report.integrated_lufs
607        );
608    }
609
610    #[test]
611    fn process_removes_dc_before_normalise() {
612        // A DC-shifted signal should still produce a compliant output
613        let samples: Vec<i16> = speech_like(1000.0, 10.0, SAMPLE_RATE)
614            .into_iter()
615            .map(|s| s.saturating_add(500))
616            .collect();
617        let bytes = to_bytes(&samples);
618        let out = process(&bytes, SAMPLE_RATE).unwrap();
619        let out_samples = bytes_to_samples(&out).unwrap();
620        // DC offset should be gone
621        assert!(
622            dc_offset::measure(&out_samples).abs() < dc_offset::DC_OFFSET_THRESHOLD,
623            "DC offset remains after processing"
624        );
625    }
626}
audiobook_creation_exchange/lib.rs

audiobook_creation_exchange/
lib.rs