use crate::common::{FRAME_DURATION_MS, MIN_LEVEL_DBFS, VAD_CONFIDENCE_THRESHOLD};
use crate::saturation_protector_buffer::SaturationProtectorBuffer;
const PEAK_ENVELOPER_SUPER_FRAME_LENGTH_MS: i32 = 400;
const MIN_MARGIN_DB: f32 = 12.0;
const MAX_MARGIN_DB: f32 = 25.0;
const ATTACK: f32 = 0.998_849_4;
const DECAY: f32 = 0.999_769_75;
#[derive(Clone, Debug, PartialEq)]
struct SaturationProtectorState {
headroom_db: f32,
peak_delay_buffer: SaturationProtectorBuffer,
max_peaks_dbfs: f32,
time_since_push_ms: i32,
}
fn reset_saturation_protector_state(
initial_headroom_db: f32,
state: &mut SaturationProtectorState,
) {
state.headroom_db = initial_headroom_db;
state.peak_delay_buffer.reset();
state.max_peaks_dbfs = MIN_LEVEL_DBFS;
state.time_since_push_ms = 0;
}
fn update_saturation_protector_state(
peak_dbfs: f32,
speech_level_dbfs: f32,
state: &mut SaturationProtectorState,
) {
state.max_peaks_dbfs = state.max_peaks_dbfs.max(peak_dbfs);
state.time_since_push_ms += FRAME_DURATION_MS;
if state.time_since_push_ms > PEAK_ENVELOPER_SUPER_FRAME_LENGTH_MS {
state.peak_delay_buffer.push_back(state.max_peaks_dbfs);
state.max_peaks_dbfs = MIN_LEVEL_DBFS;
state.time_since_push_ms = 0;
}
let delayed_peak_dbfs = state
.peak_delay_buffer
.front()
.unwrap_or(state.max_peaks_dbfs);
let difference_db = delayed_peak_dbfs - speech_level_dbfs;
if difference_db > state.headroom_db {
state.headroom_db = state.headroom_db * ATTACK + difference_db * (1.0 - ATTACK);
} else {
state.headroom_db = state.headroom_db * DECAY + difference_db * (1.0 - DECAY);
}
state.headroom_db = state.headroom_db.clamp(MIN_MARGIN_DB, MAX_MARGIN_DB);
}
#[derive(Debug)]
pub struct SaturationProtector {
initial_headroom_db: f32,
adjacent_speech_frames_threshold: i32,
num_adjacent_speech_frames: i32,
headroom_db: f32,
preliminary_state: SaturationProtectorState,
reliable_state: SaturationProtectorState,
}
impl SaturationProtector {
pub fn new(initial_headroom_db: f32, adjacent_speech_frames_threshold: i32) -> Self {
let mut sp = Self {
initial_headroom_db,
adjacent_speech_frames_threshold,
num_adjacent_speech_frames: 0,
headroom_db: initial_headroom_db,
preliminary_state: SaturationProtectorState {
headroom_db: initial_headroom_db,
peak_delay_buffer: SaturationProtectorBuffer::default(),
max_peaks_dbfs: MIN_LEVEL_DBFS,
time_since_push_ms: 0,
},
reliable_state: SaturationProtectorState {
headroom_db: initial_headroom_db,
peak_delay_buffer: SaturationProtectorBuffer::default(),
max_peaks_dbfs: MIN_LEVEL_DBFS,
time_since_push_ms: 0,
},
};
sp.reset();
sp
}
pub fn headroom_db(&self) -> f32 {
self.headroom_db
}
pub fn analyze(&mut self, speech_probability: f32, peak_dbfs: f32, speech_level_dbfs: f32) {
if speech_probability < VAD_CONFIDENCE_THRESHOLD {
if self.adjacent_speech_frames_threshold > 1 {
if self.num_adjacent_speech_frames >= self.adjacent_speech_frames_threshold {
self.reliable_state = self.preliminary_state.clone();
} else if self.num_adjacent_speech_frames > 0 {
self.preliminary_state = self.reliable_state.clone();
}
}
self.num_adjacent_speech_frames = 0;
} else {
self.num_adjacent_speech_frames += 1;
update_saturation_protector_state(
peak_dbfs,
speech_level_dbfs,
&mut self.preliminary_state,
);
if self.num_adjacent_speech_frames >= self.adjacent_speech_frames_threshold {
self.headroom_db = self.preliminary_state.headroom_db;
}
}
}
pub fn reset(&mut self) {
self.num_adjacent_speech_frames = 0;
self.headroom_db = self.initial_headroom_db;
reset_saturation_protector_state(self.initial_headroom_db, &mut self.preliminary_state);
reset_saturation_protector_state(self.initial_headroom_db, &mut self.reliable_state);
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::common::FRAME_DURATION_MS;
const INITIAL_HEADROOM_DB: f32 = 20.0;
const NO_ADJACENT_SPEECH_FRAMES_REQUIRED: i32 = 1;
const MAX_SPEECH_PROBABILITY: f32 = 1.0;
fn run_on_constant_level(
num_iterations: i32,
speech_probability: f32,
peak_dbfs: f32,
speech_level_dbfs: f32,
saturation_protector: &mut SaturationProtector,
) -> f32 {
let mut last_headroom = saturation_protector.headroom_db();
let mut max_difference = 0.0_f32;
for _ in 0..num_iterations {
saturation_protector.analyze(speech_probability, peak_dbfs, speech_level_dbfs);
let new_headroom = saturation_protector.headroom_db();
max_difference = max_difference.max((new_headroom - last_headroom).abs());
last_headroom = new_headroom;
}
max_difference
}
#[test]
fn reset() {
let mut sp =
SaturationProtector::new(INITIAL_HEADROOM_DB, NO_ADJACENT_SPEECH_FRAMES_REQUIRED);
let initial_headroom_db = sp.headroom_db();
run_on_constant_level(10, MAX_SPEECH_PROBABILITY, 0.0, -10.0, &mut sp);
assert_ne!(initial_headroom_db, sp.headroom_db());
sp.reset();
assert_eq!(initial_headroom_db, sp.headroom_db());
}
#[test]
fn estimates_crest_ratio() {
let num_iterations = 2000;
let peak_level_dbfs = -20.0;
let crest_factor_db = INITIAL_HEADROOM_DB + 1.0;
let speech_level_dbfs = peak_level_dbfs - crest_factor_db;
let max_difference_db = 0.5 * (INITIAL_HEADROOM_DB - crest_factor_db).abs();
let mut sp =
SaturationProtector::new(INITIAL_HEADROOM_DB, NO_ADJACENT_SPEECH_FRAMES_REQUIRED);
run_on_constant_level(
num_iterations,
MAX_SPEECH_PROBABILITY,
peak_level_dbfs,
speech_level_dbfs,
&mut sp,
);
assert!(
(sp.headroom_db() - crest_factor_db).abs() <= max_difference_db,
"headroom {} should be near crest_factor {}",
sp.headroom_db(),
crest_factor_db
);
}
#[test]
fn change_slowly() {
let num_iterations = 1000;
let peak_level_dbfs = -20.0;
let crest_factor_db = INITIAL_HEADROOM_DB - 5.0;
let other_crest_factor_db = INITIAL_HEADROOM_DB;
let speech_level_dbfs = peak_level_dbfs - crest_factor_db;
let other_speech_level_dbfs = peak_level_dbfs - other_crest_factor_db;
let mut sp =
SaturationProtector::new(INITIAL_HEADROOM_DB, NO_ADJACENT_SPEECH_FRAMES_REQUIRED);
let mut max_difference_db = run_on_constant_level(
num_iterations,
MAX_SPEECH_PROBABILITY,
peak_level_dbfs,
speech_level_dbfs,
&mut sp,
);
max_difference_db = max_difference_db.max(run_on_constant_level(
num_iterations,
MAX_SPEECH_PROBABILITY,
peak_level_dbfs,
other_speech_level_dbfs,
&mut sp,
));
let max_change_speed_db_per_second = 0.5; assert!(
max_difference_db <= max_change_speed_db_per_second / 1000.0 * FRAME_DURATION_MS as f32,
"max_difference_db {max_difference_db} exceeds max change speed"
);
}
#[test]
fn do_not_adapt_to_short_speech_segments_threshold_2() {
do_not_adapt_to_short_speech_segments(2);
}
#[test]
fn do_not_adapt_to_short_speech_segments_threshold_9() {
do_not_adapt_to_short_speech_segments(9);
}
#[test]
fn do_not_adapt_to_short_speech_segments_threshold_17() {
do_not_adapt_to_short_speech_segments(17);
}
fn do_not_adapt_to_short_speech_segments(threshold: i32) {
let mut sp = SaturationProtector::new(INITIAL_HEADROOM_DB, threshold);
let initial_headroom_db = sp.headroom_db();
run_on_constant_level(threshold - 1, MAX_SPEECH_PROBABILITY, 0.0, -10.0, &mut sp);
assert_eq!(initial_headroom_db, sp.headroom_db());
}
#[test]
fn adapt_to_enough_speech_segments_threshold_2() {
adapt_to_enough_speech_segments(2);
}
#[test]
fn adapt_to_enough_speech_segments_threshold_9() {
adapt_to_enough_speech_segments(9);
}
#[test]
fn adapt_to_enough_speech_segments_threshold_17() {
adapt_to_enough_speech_segments(17);
}
fn adapt_to_enough_speech_segments(threshold: i32) {
let mut sp = SaturationProtector::new(INITIAL_HEADROOM_DB, threshold);
let initial_headroom_db = sp.headroom_db();
run_on_constant_level(threshold + 1, MAX_SPEECH_PROBABILITY, 0.0, -10.0, &mut sp);
assert_ne!(initial_headroom_db, sp.headroom_db());
}
}