1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
//! AGC2 common constants and audio utility functions.
//!
//! Ported from `webrtc/modules/audio_processing/agc2/agc2_common.h`
//! and `webrtc/common_audio/include/audio_util.h`.
/// Minimum value of a float-encoded S16 sample (`-32768.0`).
pub const MIN_FLOAT_S16_VALUE: f32 = -32768.0;
/// Maximum value of a float-encoded S16 sample (`32767.0`).
pub const MAX_FLOAT_S16_VALUE: f32 = 32767.0;
/// Absolute maximum magnitude of a float-encoded S16 sample (`32768.0`).
pub const MAX_ABS_FLOAT_S16_VALUE: f32 = 32768.0;
/// Minimum audio level in dBFS scale for S16 samples (`20 * log10(1/32768)`).
pub const MIN_LEVEL_DBFS: f32 = -90.309;
/// Duration of one audio frame in milliseconds.
pub const FRAME_DURATION_MS: i32 = 10;
/// Number of sub-frames per frame (used for gain interpolation).
pub const SUB_FRAMES_IN_FRAME: i32 = 20;
/// Maximum number of samples per channel in a single frame (48 kHz * 10 ms).
pub const MAXIMAL_NUMBER_OF_SAMPLES_PER_CHANNEL: usize = 480;
// Adaptive digital gain applier settings.
/// At what limiter levels should we start decreasing the adaptive digital gain.
pub const LIMITER_THRESHOLD_FOR_AGC_GAIN_DBFS: f32 = -1.0;
/// Number of milliseconds to wait to periodically reset the VAD.
pub const VAD_RESET_PERIOD_MS: i32 = 1500;
/// Speech probability threshold to detect speech activity.
pub const VAD_CONFIDENCE_THRESHOLD: f32 = 0.95;
/// Minimum number of adjacent speech frames having a sufficiently high speech
/// probability to reliably detect speech activity.
pub const ADJACENT_SPEECH_FRAMES_THRESHOLD: i32 = 12;
/// Number of milliseconds of speech frames to observe to make the estimator
/// confident.
pub const LEVEL_ESTIMATOR_TIME_TO_CONFIDENCE_MS: f32 = 400.0;
/// Exponential decay factor for the speech level estimator.
pub const LEVEL_ESTIMATOR_LEAK_FACTOR: f32 = 1.0 - 1.0 / LEVEL_ESTIMATOR_TIME_TO_CONFIDENCE_MS;
/// Initial headroom in dB for the saturation protector.
pub const SATURATION_PROTECTOR_INITIAL_HEADROOM_DB: f32 = 20.0;
/// Ring buffer size (in 10 ms frames) for the saturation protector.
pub const SATURATION_PROTECTOR_BUFFER_SIZE: usize = 4;
/// Number of interpolation points in the knee region of the limiter gain curve.
///
/// These values have been tuned to limit the interpolated gain curve error given
/// the limiter parameters and allowing a maximum error of +/- 32768^-1.
pub const INTERPOLATED_GAIN_CURVE_KNEE_POINTS: usize = 22;
/// Number of interpolation points beyond the knee region.
pub const INTERPOLATED_GAIN_CURVE_BEYOND_KNEE_POINTS: usize = 10;
/// Total number of interpolation points in the limiter gain curve.
pub const INTERPOLATED_GAIN_CURVE_TOTAL_POINTS: usize =
INTERPOLATED_GAIN_CURVE_KNEE_POINTS + INTERPOLATED_GAIN_CURVE_BEYOND_KNEE_POINTS;
/// Maximum input level in dBFS for the limiter.
pub const LIMITER_MAX_INPUT_LEVEL_DB_FS: f64 = 1.0;
/// Smoothness of the limiter knee transition in dB.
pub const LIMITER_KNEE_SMOOTHNESS_DB: f64 = 1.0;
/// Compression ratio applied above the limiter knee.
pub const LIMITER_COMPRESSION_RATIO: f64 = 5.0;
// Audio utility functions ported from common_audio/include/audio_util.h.
/// Converts a dB value to a linear ratio: `10^(v/20)`.
/// Converts a dBFS value to a float S16 linear value.
/// Converts a float S16 linear value to dBFS.
/// Converts a dBFS value to a float S16 linear value (f64 version).
/// Converts a float S16 linear value to dBFS (f64 version).