1pub mod analyse;
34pub mod bitstream;
35pub mod breath;
36pub mod click;
37pub mod consistency;
38pub mod crossfade;
39pub mod dc_offset;
40pub mod deess;
41pub mod denoise;
42pub mod eq;
43pub mod error;
44pub mod gate;
45pub mod limiter;
46pub mod loudness_preset;
47pub mod lufs;
48pub mod multiband;
49pub mod normalise;
50pub mod pause_norm;
51pub mod plosive;
52pub mod room_tone;
53pub mod spectral;
54pub mod temporal;
55
56pub use analyse::{AcxReport, analyse};
57pub use bitstream::{CbrReport, Id3Report, check_cbr, check_id3_tags};
58pub use consistency::{ConsistencyReport, consistency_check};
59pub use crossfade::crossfade;
60pub use error::AcxError;
61pub use loudness_preset::LoudnessPreset;
62pub use lufs::{LufsReport, integrated_lufs, loudness_range};
63pub use multiband::MultibandParams;
64pub use spectral::{SpectralViolation, SpectralViolationKind, scan as spectral_scan};
65pub use temporal::{
66 DeadAirViolation, check_bookends, count_digital_zero_runs, detect_dead_air, max_dead_air,
67};
68
69#[derive(Debug, Clone)]
76pub struct AcxConfig {
77 pub rms_target_db: f32,
79 pub rms_min_db: f32,
81 pub rms_max_db: f32,
83 pub peak_ceiling_db: f32,
85 pub noise_floor_max_db: f32,
87 pub silence_threshold_db: f32,
89 pub room_tone_db: f32,
91 pub dead_air_limit: time::Duration,
93 pub sibilance_ratio_threshold: f32,
95 pub plosive_ratio_threshold: f32,
97
98 pub click_suppression_enabled: bool,
101 pub denoise_enabled: bool,
107 pub denoise_profile_ms: u32,
109 pub denoise_oversubtraction: f32,
111 pub denoise_spectral_floor: f32,
113 pub eq_enabled: bool,
115 pub eq_low_shelf_db: f32,
117 pub eq_high_shelf_db: f32,
119 pub deess_enabled: bool,
121 pub deess_threshold_ratio: f32,
123 pub deess_max_reduction_db: f32,
125 pub plosive_suppression_enabled: bool,
127 pub plosive_attenuation_db: f32,
129 pub multiband_enabled: bool,
131 pub breath_removal_enabled: bool,
134 pub pause_norm_enabled: bool,
136 pub pause_sentence_target_ms: u32,
138 pub pause_paragraph_target_ms: u32,
140 pub pause_scene_target_ms: u32,
142}
143
144impl Default for AcxConfig {
145 fn default() -> Self {
146 Self {
147 rms_target_db: -20.5,
148 rms_min_db: -23.0,
149 rms_max_db: -18.0,
150 peak_ceiling_db: -3.0,
151 noise_floor_max_db: -60.0,
152 silence_threshold_db: -65.0,
153 room_tone_db: -62.0, dead_air_limit: temporal::DEAD_AIR_LIMIT,
155 sibilance_ratio_threshold: spectral::SIBILANCE_RATIO_THRESHOLD,
156 plosive_ratio_threshold: spectral::PLOSIVE_RATIO_THRESHOLD,
157 click_suppression_enabled: true,
158 denoise_enabled: false,
159 denoise_profile_ms: denoise::DEFAULT_PROFILE_MS,
160 denoise_oversubtraction: denoise::DEFAULT_OVERSUBTRACTION,
161 denoise_spectral_floor: denoise::DEFAULT_SPECTRAL_FLOOR,
162 eq_enabled: true,
163 eq_low_shelf_db: eq::DEFAULT_LOW_SHELF_DB,
164 eq_high_shelf_db: eq::DEFAULT_HIGH_SHELF_DB,
165 deess_enabled: true,
166 deess_threshold_ratio: deess::DEFAULT_THRESHOLD_RATIO,
167 deess_max_reduction_db: deess::DEFAULT_MAX_REDUCTION_DB,
168 plosive_suppression_enabled: true,
169 plosive_attenuation_db: plosive::DEFAULT_ATTENUATION_DB,
170 multiband_enabled: true,
171 breath_removal_enabled: false,
172 pause_norm_enabled: true,
173 pause_sentence_target_ms: pause_norm::DEFAULT_SENTENCE_TARGET_MS,
174 pause_paragraph_target_ms: pause_norm::DEFAULT_PARAGRAPH_TARGET_MS,
175 pause_scene_target_ms: pause_norm::DEFAULT_SCENE_TARGET_MS,
176 }
177 }
178}
179
180#[derive(Debug, Clone)]
185pub struct DiagnosticReport {
186 pub rms_db: f32,
189 pub peak_db: f32,
191 pub noise_floor_db: f32,
193 pub acx_compliant: bool,
195
196 pub dc_offset: f32,
199 pub has_dc_offset: bool,
201
202 pub spectral_violations: Vec<SpectralViolation>,
205
206 pub dead_air_violations: Vec<DeadAirViolation>,
209 pub head_ok: bool,
211 pub tail_ok: bool,
213 pub digital_zero_runs: usize,
215
216 pub integrated_lufs: f32,
219 pub loudness_range: f32,
221}
222
223pub fn validate(pcm_bytes: &[u8], sample_rate: u32) -> Result<DiagnosticReport, AcxError> {
228 validate_with_config(pcm_bytes, sample_rate, &AcxConfig::default())
229}
230
231pub fn validate_with_config(
233 pcm_bytes: &[u8],
234 sample_rate: u32,
235 cfg: &AcxConfig,
236) -> Result<DiagnosticReport, AcxError> {
237 if pcm_bytes.is_empty() {
238 return Err(AcxError::EmptyInput);
239 }
240
241 let samples = bytes_to_samples(pcm_bytes)?;
242
243 let acx = analyse::analyse(&samples, sample_rate, cfg);
244 let dc = dc_offset::measure(&samples);
245 let spectral_violations = spectral::scan(&samples, sample_rate);
246 let dead_air_violations =
247 temporal::detect_dead_air(&samples, sample_rate, cfg.silence_threshold_db);
248 let (head_ok, tail_ok) = temporal::check_bookends(&samples, sample_rate);
249 let digital_zero_runs = temporal::count_digital_zero_runs(&samples);
250 let il = lufs::integrated_lufs(&samples, sample_rate);
251 let lr = lufs::loudness_range(&samples, sample_rate);
252
253 Ok(DiagnosticReport {
254 rms_db: acx.rms_db,
255 peak_db: acx.peak_db,
256 noise_floor_db: acx.noise_floor_db,
257 acx_compliant: acx.compliant,
258 dc_offset: dc,
259 has_dc_offset: dc_offset::has_offset(&samples),
260 spectral_violations,
261 dead_air_violations,
262 head_ok,
263 tail_ok,
264 digital_zero_runs,
265 integrated_lufs: il,
266 loudness_range: lr,
267 })
268}
269
270pub fn bytes_to_samples(bytes: &[u8]) -> Result<Vec<i16>, AcxError> {
274 if bytes.len() % 2 != 0 {
275 return Err(AcxError::OddByteLength);
276 }
277 Ok(bytes
278 .chunks_exact(2)
279 .map(|c| i16::from_le_bytes([c[0], c[1]]))
280 .collect())
281}
282
283pub fn samples_to_bytes(samples: &[i16]) -> Vec<u8> {
285 samples.iter().flat_map(|&s| s.to_le_bytes()).collect()
286}
287
288pub fn process(pcm_bytes: &[u8], sample_rate: u32) -> Result<Vec<u8>, AcxError> {
298 process_with_config(pcm_bytes, sample_rate, &AcxConfig::default())
299}
300
301pub fn process_with_config(
303 pcm_bytes: &[u8],
304 sample_rate: u32,
305 cfg: &AcxConfig,
306) -> Result<Vec<u8>, AcxError> {
307 if pcm_bytes.is_empty() {
308 return Err(AcxError::EmptyInput);
309 }
310
311 let mut samples = bytes_to_samples(pcm_bytes)?;
312
313 if cfg.click_suppression_enabled {
316 click::suppress_clicks(&mut samples, sample_rate);
317 }
318
319 if dc_offset::has_offset(&samples) {
321 dc_offset::remove(&mut samples);
322 }
323
324 if cfg.denoise_enabled {
327 denoise::denoise_with_params(
328 &mut samples,
329 sample_rate,
330 cfg.denoise_profile_ms,
331 cfg.denoise_oversubtraction,
332 cfg.denoise_spectral_floor,
333 );
334 }
335
336 if cfg.eq_enabled {
339 eq::apply_warmth_with_params(
340 &mut samples,
341 sample_rate,
342 cfg.eq_low_shelf_db,
343 eq::DEFAULT_LOW_SHELF_HZ,
344 cfg.eq_high_shelf_db,
345 eq::DEFAULT_HIGH_SHELF_HZ,
346 );
347 }
348
349 if cfg.deess_enabled {
354 deess::deess_with_params(
355 &mut samples,
356 sample_rate,
357 cfg.deess_threshold_ratio,
358 cfg.deess_max_reduction_db,
359 );
360 }
361 if cfg.plosive_suppression_enabled {
362 plosive::suppress_plosives_with_attenuation(
363 &mut samples,
364 sample_rate,
365 cfg.plosive_attenuation_db,
366 );
367 }
368
369 if cfg.multiband_enabled {
373 multiband::compress(&mut samples, sample_rate);
374 }
375
376 let normalise_target = {
395 let head_s =
396 (sample_rate as usize * temporal::HEAD_DURATION.whole_milliseconds() as usize) / 1000;
397 let tail_s =
398 (sample_rate as usize * temporal::TAIL_DURATION.whole_milliseconds() as usize) / 1000;
399 let speech_start = head_s.min(samples.len());
400 let speech_end = samples.len().saturating_sub(tail_s).max(speech_start);
401
402 if speech_start < speech_end {
403 let overall_rms_db = analyse::rms_db(&samples);
404 let middle_rms_db = analyse::rms_db(&samples[speech_start..speech_end]);
405 let s = samples.len() as f32;
406 let b = (head_s + tail_s).min(samples.len()) as f32;
407 cfg.rms_target_db + 10.0 * (s / (s - b)).log10() + (overall_rms_db - middle_rms_db)
409 } else {
410 cfg.rms_target_db
411 }
412 };
413 normalise::normalise(&mut samples, normalise_target);
414
415 limiter::limit(&mut samples, sample_rate, cfg.peak_ceiling_db);
418
419 if cfg.breath_removal_enabled {
422 breath::remove_breaths(&mut samples, sample_rate, cfg.room_tone_db);
423 }
424
425 if cfg.pause_norm_enabled {
428 samples = pause_norm::normalize_pauses_with_targets(
429 &samples,
430 sample_rate,
431 cfg.pause_sentence_target_ms,
432 cfg.pause_paragraph_target_ms,
433 cfg.pause_scene_target_ms,
434 );
435 }
436
437 let tone_samples = sample_rate as usize / 2; let tone = room_tone::generate_room_tone(tone_samples, cfg.room_tone_db);
440 gate::gate_to_room_tone(&mut samples, sample_rate, cfg.silence_threshold_db, &tone);
441 gate::pad_bookends(&mut samples, sample_rate, &tone);
442
443 let report = analyse::analyse(&samples, sample_rate, cfg);
445 if !report.compliant {
446 return Err(AcxError::StillNonCompliant {
447 rms_db: report.rms_db,
448 rms_min: cfg.rms_min_db,
449 rms_max: cfg.rms_max_db,
450 peak_db: report.peak_db,
451 peak_ceiling: cfg.peak_ceiling_db,
452 noise_floor_db: report.noise_floor_db,
453 noise_floor_max: cfg.noise_floor_max_db,
454 });
455 }
456
457 Ok(samples_to_bytes(&samples))
458}
459
460#[cfg(test)]
461mod tests {
462 use super::*;
463
464 const SAMPLE_RATE: u32 = 24_000;
465
466 fn sine_wave(freq_hz: f32, duration_secs: f32, amplitude: f32, sample_rate: u32) -> Vec<i16> {
467 let n = (sample_rate as f32 * duration_secs) as usize;
468 (0..n)
469 .map(|i| {
470 let t = i as f32 / sample_rate as f32;
471 let v = amplitude * (2.0 * std::f32::consts::PI * freq_hz * t).sin();
472 v.clamp(i16::MIN as f32, i16::MAX as f32) as i16
473 })
474 .collect()
475 }
476
477 fn to_bytes(samples: &[i16]) -> Vec<u8> {
478 samples_to_bytes(samples)
479 }
480
481 fn speech_like(amplitude: f32, total_secs: f32, sample_rate: u32) -> Vec<i16> {
483 let speech_ms = 300usize;
484 let pause_ms = 50usize;
485 let period_samples = (sample_rate as usize * (speech_ms + pause_ms)) / 1000;
486 let total_samples = (sample_rate as f32 * total_secs) as usize;
487 let speech_samples = (sample_rate as usize * speech_ms) / 1000;
488
489 let mut out = Vec::with_capacity(total_samples);
490 let mut t = 0usize;
491 while out.len() < total_samples {
492 let pos = t % period_samples;
493 if pos < speech_samples {
494 let sine_t = pos as f32 / sample_rate as f32;
495 let v = amplitude * (2.0 * std::f32::consts::PI * 440.0 * sine_t).sin();
496 out.push(v.clamp(i16::MIN as f32, i16::MAX as f32) as i16);
497 } else {
498 out.push(0i16);
499 }
500 t += 1;
501 }
502 out.truncate(total_samples);
503 out
504 }
505
506 #[test]
507 fn normalise_brings_quiet_track_into_window() {
508 let samples = speech_like(1000.0, 10.0, SAMPLE_RATE);
510 let bytes = to_bytes(&samples);
511 let out = process(&bytes, SAMPLE_RATE).unwrap();
512 let out_samples = bytes_to_samples(&out).unwrap();
513 let report = analyse::analyse(&out_samples, SAMPLE_RATE, &AcxConfig::default());
514 assert!(
515 report.rms_db >= -23.0 && report.rms_db <= -18.0,
516 "RMS out of ACX window: {:.1} dB",
517 report.rms_db
518 );
519 }
520
521 #[test]
522 fn limiter_prevents_clipping() {
523 let samples = speech_like(i16::MAX as f32 * 0.99, 10.0, SAMPLE_RATE);
524 let bytes = to_bytes(&samples);
525 let out = process(&bytes, SAMPLE_RATE).unwrap();
526 let out_samples = bytes_to_samples(&out).unwrap();
527 let report = analyse::analyse(&out_samples, SAMPLE_RATE, &AcxConfig::default());
528 assert!(
529 report.peak_db <= -3.0,
530 "Peak exceeded ACX ceiling: {:.1} dB",
531 report.peak_db
532 );
533 }
534
535 #[test]
536 fn gate_replaces_digital_silence() {
537 let mut samples = vec![0i16; SAMPLE_RATE as usize];
539 samples.extend(sine_wave(440.0, 8.0, 3000.0, SAMPLE_RATE));
540 let bytes = to_bytes(&samples);
541 let out = process(&bytes, SAMPLE_RATE).unwrap();
542 let out_samples = bytes_to_samples(&out).unwrap();
543 let silent_half = &out_samples[..SAMPLE_RATE as usize];
544 let floor = analyse::noise_floor_db(silent_half, SAMPLE_RATE);
545 assert!(floor > -144.0, "Gate did not replace digital silence");
546 }
547
548 #[test]
549 fn odd_byte_length_returns_error() {
550 let bytes = vec![0u8; 101];
551 assert!(matches!(
552 process(&bytes, SAMPLE_RATE),
553 Err(AcxError::OddByteLength)
554 ));
555 }
556
557 #[test]
558 fn empty_input_returns_error() {
559 assert!(matches!(
560 process(&[], SAMPLE_RATE),
561 Err(AcxError::EmptyInput)
562 ));
563 }
564
565 #[test]
566 fn room_tone_hits_target_db() {
567 let tone = room_tone::generate_room_tone(SAMPLE_RATE as usize, -62.0);
568 let measured = analyse::rms_db(&tone);
569 assert!(
570 (measured - (-62.0)).abs() < 1.5,
571 "Room tone RMS {:.1} dB too far from −62 dB",
572 measured
573 );
574 }
575
576 #[test]
577 fn analyse_report_is_accurate() {
578 let samples = sine_wave(440.0, 2.0, i16::MAX as f32, SAMPLE_RATE);
579 let cfg = AcxConfig::default();
580 let report = analyse::analyse(&samples, SAMPLE_RATE, &cfg);
581 assert!(!report.compliant);
582 assert!(report.peak_db > cfg.peak_ceiling_db);
583 }
584
585 #[test]
586 fn validate_detects_dc_offset() {
587 let samples: Vec<i16> = speech_like(2000.0, 2.0, SAMPLE_RATE)
589 .into_iter()
590 .map(|s| s.saturating_add(1000))
591 .collect();
592 let bytes = to_bytes(&samples);
593 let report = validate(&bytes, SAMPLE_RATE).unwrap();
594 assert!(report.has_dc_offset, "Expected DC offset to be detected");
595 }
596
597 #[test]
598 fn validate_returns_lufs_for_speech_signal() {
599 let samples = speech_like(3000.0, 5.0, SAMPLE_RATE);
600 let bytes = to_bytes(&samples);
601 let report = validate(&bytes, SAMPLE_RATE).unwrap();
602 assert!(
604 report.integrated_lufs < 0.0 && report.integrated_lufs > -144.0,
605 "Unexpected LUFS: {:.1}",
606 report.integrated_lufs
607 );
608 }
609
610 #[test]
611 fn process_removes_dc_before_normalise() {
612 let samples: Vec<i16> = speech_like(1000.0, 10.0, SAMPLE_RATE)
614 .into_iter()
615 .map(|s| s.saturating_add(500))
616 .collect();
617 let bytes = to_bytes(&samples);
618 let out = process(&bytes, SAMPLE_RATE).unwrap();
619 let out_samples = bytes_to_samples(&out).unwrap();
620 assert!(
622 dc_offset::measure(&out_samples).abs() < dc_offset::DC_OFFSET_THRESHOLD,
623 "DC offset remains after processing"
624 );
625 }
626}