Skip to main content

webrtc_audio_processing_config/
lib.rs

1//! This crate provides config structs for `webrtc-audio-processing` without any FFI and with only
2//! minimal dependencies. Handy when you want to configure it from e.g. WASM project.
3
4#![warn(clippy::all)]
5#![warn(missing_docs)]
6
7#[cfg(feature = "serde")]
8use serde::{Deserialize, Serialize};
9
10/// The parameters and behavior of the audio processing module are controlled
11/// by changing the default values in this [`Config`] struct.
12/// The config is applied by passing the struct to the
13/// [`Processor::set_config()`](webrtc-audio-processing::Processor::set_config()) method.
14#[derive(Debug, Default, Copy, Clone, PartialEq)]
15#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
16#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
17pub struct Config {
18    /// Sets the properties of the audio processing pipeline.
19    pub pipeline: Pipeline,
20
21    /// Enables and configures capture-side pre-amplifier/capture-level adjustment.
22    pub capture_amplifier: Option<CaptureAmplifier>,
23
24    /// Enables and configures high pass filter. Strongly recommended if echo cancellation is
25    /// enabled. Enabling AECM or noise suppression force-enables high pass filter.
26    pub high_pass_filter: Option<HighPassFilter>,
27
28    /// Enables and configures acoustic echo cancellation.
29    pub echo_canceller: Option<EchoCanceller>,
30
31    /// Enables and configures background noise suppression. Force-enables high pass filtering.
32    pub noise_suppression: Option<NoiseSuppression>,
33
34    /// Enables and configures automatic gain control (v1 or v2).
35    pub gain_controller: Option<GainController>,
36}
37
38/// Sets the properties of the audio processing pipeline.
39#[derive(Debug, Default, Copy, Clone, PartialEq)]
40#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
41#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
42pub struct Pipeline {
43    /// Maximum allowed processing rate used internally.
44    pub maximum_internal_processing_rate: PipelineProcessingRate,
45
46    /// Allow multi-channel processing of render audio.
47    pub multi_channel_render: bool,
48
49    /// Allow multi-channel processing of capture audio when AEC3 is active
50    /// or a custom AEC is injected.
51    pub multi_channel_capture: bool,
52
53    /// Indicates how to downmix multi-channel capture audio to mono (when
54    /// needed).
55    pub capture_downmix_method: DownmixMethod,
56}
57
58/// Internal processing rate.
59#[derive(Debug, Copy, Clone, Default, PartialEq)]
60#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
61#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
62#[cfg_attr(feature = "strum", derive(strum::Display, strum::EnumIter))]
63pub enum PipelineProcessingRate {
64    /// Limit the rate to 32k Hz.
65    #[cfg_attr(feature = "strum", strum(serialize = "32 kHz"))]
66    Max32000Hz = 32_000,
67
68    /// Limit the rate to 48k Hz.
69    #[default]
70    #[cfg_attr(feature = "strum", strum(serialize = "48 kHz"))]
71    Max48000Hz = 48_000,
72}
73
74/// Downmix method for multi-channel capture audio.
75#[derive(Debug, Copy, Default, Clone, PartialEq, Eq)]
76#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
77#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
78#[cfg_attr(feature = "strum", derive(strum::Display, strum::EnumIter))]
79pub enum DownmixMethod {
80    /// Mix by averaging.
81    #[default]
82    Average,
83    /// Mix by selecting the first channel.
84    #[cfg_attr(feature = "strum", strum(serialize = "Use first channel"))]
85    UseFirstChannel,
86}
87
88/// A choice of capture-side pre-amplification/volume adjustment.
89#[derive(Debug, Copy, Clone, PartialEq)]
90#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
91#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(tag = "type"))]
92#[cfg_attr(feature = "strum", derive(strum::Display, strum::EnumIter))]
93pub enum CaptureAmplifier {
94    /// Use the legacy PreAmplifier.
95    #[cfg_attr(feature = "strum", strum(serialize = "Pre-amplifier"))]
96    PreAmplifier(PreAmplifier),
97    /// Use the new CaptureLevelAdjustment.
98    #[cfg_attr(feature = "strum", strum(serialize = "Capture level adjustment"))]
99    CaptureLevelAdjustment(CaptureLevelAdjustment),
100}
101
102/// The `PreAmplifier` amplifies the capture signal before any other processing is done.
103/// TODO(webrtc:5298): Will be deprecated to use the pre-gain functionality
104/// in capture_level_adjustment instead.
105#[derive(Debug, Copy, Clone, PartialEq)]
106#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
107#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
108pub struct PreAmplifier {
109    /// Fixed linear gain multiplier. The default is 1.0 (no effect).
110    pub fixed_gain_factor: f32,
111}
112
113impl Default for PreAmplifier {
114    fn default() -> Self {
115        Self { fixed_gain_factor: 1.0 }
116    }
117}
118
119/// Functionality for general level adjustment in the capture pipeline. This
120/// should not be used together with the legacy PreAmplifier functionality.
121#[derive(Debug, Copy, Clone, PartialEq)]
122#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
123#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
124pub struct CaptureLevelAdjustment {
125    /// The `pre_gain_factor` scales the signal before any processing is done.
126    pub pre_gain_factor: f32,
127
128    /// The `post_gain_factor` scales the signal after all processing is done.
129    pub post_gain_factor: f32,
130
131    /// Analog mic gain emulation.
132    pub analog_mic_gain_emulation: Option<AnalogMicGainEmulation>,
133}
134
135impl Default for CaptureLevelAdjustment {
136    fn default() -> Self {
137        Self { pre_gain_factor: 1.0, post_gain_factor: 1.0, analog_mic_gain_emulation: None }
138    }
139}
140
141/// Analog mic gain emulation for capture level adjustment.
142#[derive(Debug, Copy, Clone, PartialEq)]
143#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
144#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
145pub struct AnalogMicGainEmulation {
146    /// Initial analog gain level to use for the emulated analog gain. Must
147    /// be in the range [0...255].
148    pub initial_level: u8,
149}
150
151impl Default for AnalogMicGainEmulation {
152    fn default() -> Self {
153        Self { initial_level: 255 }
154    }
155}
156
157/// HPF (high-pass filter) configuration.
158#[derive(Debug, Copy, Clone, PartialEq)]
159#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
160#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
161pub struct HighPassFilter {
162    /// Whether or not HPF should be applied in the full-band (i.e. 20 – 20,000 Hz).
163    pub apply_in_full_band: bool,
164}
165
166impl Default for HighPassFilter {
167    fn default() -> Self {
168        Self { apply_in_full_band: true }
169    }
170}
171
172/// AEC (acoustic echo cancellation) configuration.
173/// Defaults to Full (AEC3) mode with delay estimation (stream_delay unset).
174///
175/// Functionality in the C++ library that we don't yet expose:
176/// - EchoCanceller::enforce_high_pass_filtering: hard-coded to true on Full, false on Mobile
177#[derive(Debug, Copy, Clone, PartialEq)]
178#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
179#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(tag = "type"))]
180#[cfg_attr(feature = "strum", derive(strum::Display, strum::EnumIter))]
181pub enum EchoCanceller {
182    /// Use low-complexity AEC implementation that is optimized for mobile.
183    /// Force-enables high pass filter.
184    #[cfg_attr(feature = "strum", strum(serialize = "Mobile (AECM)"))]
185    Mobile {
186        /// Set the delay in ms between process_render_frame() and process_capture_frame().
187        /// Mandatory for the Mobile echo canceller variant.
188        stream_delay_ms: u16,
189    },
190
191    /// Uses the full AEC3 implementation.
192    #[cfg_attr(feature = "strum", strum(serialize = "Full (AEC3)"))]
193    Full {
194        /// Set the delay in ms between process_render_frame() and process_capture_frame().
195        /// If None, we let the AEC processor try determining it.
196        stream_delay_ms: Option<u16>,
197    },
198}
199
200impl Default for EchoCanceller {
201    fn default() -> Self {
202        Self::Full { stream_delay_ms: None }
203    }
204}
205
206/// Enables background noise suppression.
207#[derive(Debug, Copy, Clone, PartialEq)]
208#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
209#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
210pub struct NoiseSuppression {
211    /// Determines the aggressiveness of the suppression. Increasing the level will reduce the
212    /// noise level at the expense of a higher speech distortion.
213    pub level: NoiseSuppressionLevel,
214
215    /// Analyze the output of the linear AEC instead of the capture frame.
216    /// Activates the `export_linear_aec_output` flag of the echo canceller.
217    /// Has no effect if echo cancellation is not enabled or is of the Mobile AECM type.
218    pub analyze_linear_aec_output: bool,
219}
220
221impl Default for NoiseSuppression {
222    fn default() -> Self {
223        Self { level: NoiseSuppressionLevel::Moderate, analyze_linear_aec_output: false }
224    }
225}
226
227/// Noise suppression level.
228#[derive(Debug, Copy, Clone, PartialEq)]
229#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
230#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
231#[cfg_attr(feature = "strum", derive(strum::Display, strum::EnumIter))]
232pub enum NoiseSuppressionLevel {
233    /// Lower suppression level.
234    Low,
235    /// Moderate suppression level.
236    Moderate,
237    /// Higher suppression level.
238    High,
239    /// Even higher suppression level.
240    #[cfg_attr(feature = "strum", strum(serialize = "Very High"))]
241    VeryHigh,
242}
243
244/// A choice of the gain controller implementation.
245#[derive(Debug, Copy, Clone, PartialEq)]
246#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
247#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(tag = "type"))]
248#[cfg_attr(feature = "strum", derive(strum::Display, strum::EnumIter))]
249pub enum GainController {
250    /// Legacy gain controller 1.
251    #[cfg_attr(feature = "strum", strum(serialize = "Gain Controller 1"))]
252    GainController1(GainController1),
253    /// New gain controller 2.
254    #[cfg_attr(feature = "strum", strum(serialize = "Gain Controller 2"))]
255    GainController2(GainController2),
256}
257
258/// Enables automatic gain control (AGC) functionality.
259/// The automatic gain control (AGC) component brings the signal to an
260/// appropriate range. This is done by applying a digital gain directly and,
261/// in the analog mode, prescribing an analog gain to be applied at the audio
262/// HAL.
263/// Recommended to be enabled on the client-side.
264#[derive(Debug, Copy, Clone, PartialEq)]
265#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
266#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
267pub struct GainController1 {
268    /// AGC mode.
269    pub mode: GainControllerMode,
270
271    /// Sets the target peak level (or envelope) of the AGC in dBFs (decibels
272    /// from digital full-scale). The convention is to use positive values. For
273    /// instance, passing in a value of 3 corresponds to -3 dBFs, or a target
274    /// level 3 dB below full-scale. Limited to [0, 31].
275    pub target_level_dbfs: u8,
276
277    /// Sets the maximum gain the digital compression stage may apply, in dB. A
278    /// higher number corresponds to greater compression, while a value of 0
279    /// will leave the signal uncompressed. Limited to [0, 90].
280    ///
281    /// For updates after APM setup, the C++ upstream suggests using RuntimeSetting
282    /// instead (which is not yet exposed in the Rust wrapper).
283    pub compression_gain_db: u8,
284
285    /// When enabled, the compression stage will hard limit the signal to the
286    /// target level. Otherwise, the signal will be compressed but not limited
287    /// above the target level.
288    pub enable_limiter: bool,
289
290    /// Analog gain controller configuration.
291    pub analog_gain_controller: Option<AnalogGainController>,
292}
293
294impl Default for GainController1 {
295    fn default() -> Self {
296        Self {
297            mode: GainControllerMode::AdaptiveAnalog,
298            target_level_dbfs: 3,
299            compression_gain_db: 9,
300            enable_limiter: true,
301            analog_gain_controller: None,
302        }
303    }
304}
305
306/// Gain control mode.
307#[derive(Debug, Copy, Clone, PartialEq)]
308#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
309#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
310#[cfg_attr(feature = "strum", derive(strum::Display, strum::EnumIter))]
311pub enum GainControllerMode {
312    /// Adaptive mode intended for use if an analog volume control is
313    /// available on the capture device. It will require the user to provide
314    /// coupling between the OS mixer controls and AGC through the
315    /// stream_analog_level() functions.
316    /// It consists of an analog gain prescription for the audio device and a
317    /// digital compression stage.
318    #[cfg_attr(feature = "strum", strum(serialize = "Adaptive Analog"))]
319    AdaptiveAnalog,
320    /// Adaptive mode intended for situations in which an analog volume
321    /// control is unavailable. It operates in a similar fashion to the
322    /// adaptive analog mode, but with scaling instead applied in the digital
323    /// domain. As with the analog mode, it additionally uses a digital
324    /// compression stage.
325    #[cfg_attr(feature = "strum", strum(serialize = "Adaptive Digital"))]
326    AdaptiveDigital,
327    /// Fixed mode which enables only the digital compression stage also used
328    /// by the two adaptive modes.
329    /// It is distinguished from the adaptive modes by considering only a
330    /// short time-window of the input signal. It applies a fixed gain
331    /// through most of the input level range, and compresses (gradually
332    /// reduces gain with increasing level) the input signal at higher
333    /// levels. This mode is preferred on embedded devices where the capture
334    /// signal level is predictable, so that a known gain can be applied.
335    #[cfg_attr(feature = "strum", strum(serialize = "Fixed Digital"))]
336    FixedDigital,
337}
338
339/// Enables the analog gain controller functionality.
340#[derive(Debug, Copy, Clone, PartialEq)]
341#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
342#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
343pub struct AnalogGainController {
344    /// TODO(bugs.webrtc.org/7494): Will be deprecated.
345    pub startup_min_volume: i32,
346    /// Lowest analog microphone level that will be applied in response to
347    /// clipping.
348    pub clipped_level_min: i32,
349    /// If true, an adaptive digital gain is applied.
350    pub enable_digital_adaptive: bool,
351    /// Amount the microphone level is lowered with every clipping event.
352    /// Limited to (0, 255].
353    pub clipped_level_step: i32,
354    /// Proportion of clipped samples required to declare a clipping event.
355    /// Limited to (0.f, 1.f).
356    pub clipped_ratio_threshold: f32,
357    /// Time in frames to wait after a clipping event before checking again.
358    /// Limited to values higher than 0.
359    pub clipped_wait_frames: i32,
360    /// Clipping predictor.
361    pub clipping_predictor: Option<ClippingPredictor>,
362}
363
364impl Default for AnalogGainController {
365    fn default() -> Self {
366        Self {
367            startup_min_volume: 0,
368            clipped_level_min: 70,
369            enable_digital_adaptive: true,
370            clipped_level_step: 15,
371            clipped_ratio_threshold: 0.1,
372            clipped_wait_frames: 300,
373            clipping_predictor: None,
374        }
375    }
376}
377
378/// Enables clipping prediction functionality.
379#[derive(Debug, Copy, Clone, PartialEq)]
380#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
381#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
382pub struct ClippingPredictor {
383    /// Mode.
384    pub mode: ClippingPredictorMode,
385    /// Number of frames in the sliding analysis window.
386    pub window_length: i32,
387    /// Number of frames in the sliding reference window.
388    pub reference_window_length: i32,
389    /// Reference window delay (unit: number of frames).
390    pub reference_window_delay: i32,
391    /// Clipping prediction threshold (dBFS).
392    pub clipping_threshold: f32,
393    /// Crest factor drop threshold (dB).
394    pub crest_factor_margin: f32,
395    /// If true, the recommended clipped level step is used to modify the
396    /// analog gain. Otherwise, the predictor runs without affecting the
397    /// analog gain.
398    pub use_predicted_step: bool,
399}
400
401impl Default for ClippingPredictor {
402    fn default() -> Self {
403        Self {
404            mode: ClippingPredictorMode::ClippingEventPrediction,
405            window_length: 5,
406            reference_window_length: 5,
407            reference_window_delay: 5,
408            clipping_threshold: -1.0,
409            crest_factor_margin: 3.0,
410            use_predicted_step: true,
411        }
412    }
413}
414
415/// Clipping predictor mode.
416#[derive(Debug, Copy, Clone, PartialEq)]
417#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
418#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
419#[cfg_attr(feature = "strum", derive(strum::Display, strum::EnumIter))]
420pub enum ClippingPredictorMode {
421    /// Clipping event prediction mode with fixed step estimation.
422    #[cfg_attr(feature = "strum", strum(serialize = "Clipping Event Prediction"))]
423    ClippingEventPrediction,
424    /// Clipped peak estimation mode with adaptive step estimation.
425    #[cfg_attr(feature = "strum", strum(serialize = "Adaptive Step Clipping Peak Prediction"))]
426    AdaptiveStepClippingPeakPrediction,
427    /// Clipped peak estimation mode with fixed step estimation.
428    #[cfg_attr(feature = "strum", strum(serialize = "Fixed Step Clipping Peak Prediction"))]
429    FixedStepClippingPeakPrediction,
430}
431
432/// Parameters for AGC2, an Automatic Gain Control (AGC) sub-module which
433/// replaces the AGC sub-module parameterized by `gain_controller1`.
434/// AGC2 brings the captured audio signal to the desired level by combining
435/// three different controllers (namely, input volume controller, adaptive
436/// digital controller and fixed digital controller) and a limiter.
437#[derive(Debug, Copy, Default, Clone, PartialEq)]
438#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
439#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
440pub struct GainController2 {
441    /// Enables the input volume controller, which adjusts the input
442    /// volume applied when the audio is captured (e.g., microphone volume on
443    /// a soundcard, input volume on HAL).
444    pub input_volume_controller_enabled: bool,
445    /// Parameters for the adaptive digital controller, which adjusts and
446    /// applies a digital gain after echo cancellation and after noise
447    /// suppression.
448    pub adaptive_digital: Option<AdaptiveDigital>,
449    /// Parameters for the fixed digital controller, which applies a fixed
450    /// digital gain after the adaptive digital controller and before the
451    /// limiter.
452    pub fixed_digital: FixedDigital,
453}
454
455/// Parameters for the adaptive digital controller, which adjusts and
456/// applies a digital gain after echo cancellation and after noise
457/// suppression.
458#[derive(Debug, Copy, Clone, PartialEq)]
459#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
460#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
461pub struct AdaptiveDigital {
462    /// Headroom (dB).
463    pub headroom_db: f32,
464    /// Max gain (dB).
465    pub max_gain_db: f32,
466    /// Initial gain (dB).
467    pub initial_gain_db: f32,
468    /// Max gain change speed (dB/s).
469    pub max_gain_change_db_per_second: f32,
470    /// Max output noise level (dBFS).
471    pub max_output_noise_level_dbfs: f32,
472}
473
474impl Default for AdaptiveDigital {
475    fn default() -> Self {
476        Self {
477            headroom_db: 5.0,
478            max_gain_db: 50.0,
479            initial_gain_db: 15.0,
480            max_gain_change_db_per_second: 6.0,
481            max_output_noise_level_dbfs: -50.0,
482        }
483    }
484}
485
486/// Parameters for the fixed digital controller, which applies a fixed
487/// digital gain after the adaptive digital controller and before the
488/// limiter.
489#[derive(Debug, Copy, Clone, PartialEq)]
490#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
491#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
492pub struct FixedDigital {
493    /// By setting `gain_db` to a value greater than zero, the limiter can be
494    /// turned into a compressor that first applies a fixed gain.
495    pub gain_db: f32,
496}
497
498impl Default for FixedDigital {
499    fn default() -> Self {
500        Self { gain_db: 0.0 }
501    }
502}