webrtc_audio_processing_config/
lib.rs

1//! This crate provides config structs for `webrtc-audio-processing` without any FFI and with only
2//! minimal dependencies. Handy when you want to configure it from e.g. WASM project.
3
4#![warn(clippy::all)]
5#![warn(missing_docs)]
6
7#[cfg(feature = "serde")]
8use serde::{Deserialize, Serialize};
9
10/// The parameters and behavior of the audio processing module are controlled
11/// by changing the default values in this [`Config`] struct.
12/// The config is applied by passing the struct to the
13/// [`Processor::set_config()`](webrtc-audio-processing::Processor::set_config()) method.
14#[derive(Debug, Default, Copy, Clone, PartialEq)]
15#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
16#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
17pub struct Config {
18    /// Sets the properties of the audio processing pipeline.
19    pub pipeline: Pipeline,
20
21    /// Enables and configures capture-side pre-amplifier/capture-level adjustment.
22    pub capture_amplifier: Option<CaptureAmplifier>,
23
24    /// Enables and configures high pass filter. Strongly recommended if echo cancellation is
25    /// enabled. Enabling AECM or noise suppression force-enables high pass filter.
26    pub high_pass_filter: Option<HighPassFilter>,
27
28    /// Enables and configures acoustic echo cancellation.
29    pub echo_canceller: Option<EchoCanceller>,
30
31    /// Enables and configures background noise suppression. Force-enables high pass filtering.
32    pub noise_suppression: Option<NoiseSuppression>,
33
34    /// Enables and configures automatic gain control (v1 or v2).
35    pub gain_controller: Option<GainController>,
36}
37
38/// Sets the properties of the audio processing pipeline.
39#[derive(Debug, Default, Copy, Clone, PartialEq)]
40#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
41#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
42pub struct Pipeline {
43    /// Maximum allowed processing rate used internally.
44    pub maximum_internal_processing_rate: PipelineProcessingRate,
45
46    /// Allow multi-channel processing of render audio.
47    pub multi_channel_render: bool,
48
49    /// Allow multi-channel processing of capture audio when AEC3 is active
50    /// or a custom AEC is injected.
51    pub multi_channel_capture: bool,
52
53    /// Indicates how to downmix multi-channel capture audio to mono (when
54    /// needed).
55    pub capture_downmix_method: DownmixMethod,
56}
57
58/// Internal processing rate.
59#[derive(Debug, Copy, Clone, Default, PartialEq)]
60#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
61#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
62#[cfg_attr(feature = "strum", derive(strum::Display, strum::EnumIter))]
63pub enum PipelineProcessingRate {
64    /// Limit the rate to 32k Hz.
65    #[cfg_attr(feature = "strum", strum(serialize = "32 kHz"))]
66    Max32000Hz = 32_000,
67
68    /// Limit the rate to 48k Hz.
69    #[default]
70    #[cfg_attr(feature = "strum", strum(serialize = "48 kHz"))]
71    Max48000Hz = 48_000,
72}
73
74/// Downmix method for multi-channel capture audio.
75#[derive(Debug, Copy, Default, Clone, PartialEq, Eq)]
76#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
77#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
78#[cfg_attr(feature = "strum", derive(strum::Display, strum::EnumIter))]
79pub enum DownmixMethod {
80    /// Mix by averaging.
81    #[default]
82    Average,
83    /// Mix by selecting the first channel.
84    #[cfg_attr(feature = "strum", strum(serialize = "Use first channel"))]
85    UseFirstChannel,
86}
87
88/// A choice of capture-side pre-amplification/volume adjustment.
89#[derive(Debug, Copy, Clone, PartialEq)]
90#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
91#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(tag = "type"))]
92#[cfg_attr(feature = "strum", derive(strum::Display, strum::EnumIter))]
93pub enum CaptureAmplifier {
94    /// Use the legacy PreAmplifier.
95    #[cfg_attr(feature = "strum", strum(serialize = "Pre-amplifier"))]
96    PreAmplifier(PreAmplifier),
97    /// Use the new CaptureLevelAdjustment.
98    #[cfg_attr(feature = "strum", strum(serialize = "Capture level adjustment"))]
99    CaptureLevelAdjustment(CaptureLevelAdjustment),
100}
101
102/// The `PreAmplifier` amplifies the capture signal before any other processing is done.
103/// TODO(webrtc:5298): Will be deprecated to use the pre-gain functionality
104/// in capture_level_adjustment instead.
105#[derive(Debug, Copy, Clone, PartialEq)]
106#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
107#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
108pub struct PreAmplifier {
109    /// Fixed linear gain multiplier. The default is 1.0 (no effect).
110    pub fixed_gain_factor: f32,
111}
112
113impl Default for PreAmplifier {
114    fn default() -> Self {
115        Self { fixed_gain_factor: 1.0 }
116    }
117}
118
119/// Functionality for general level adjustment in the capture pipeline. This
120/// should not be used together with the legacy PreAmplifier functionality.
121#[derive(Debug, Copy, Clone, PartialEq)]
122#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
123#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
124pub struct CaptureLevelAdjustment {
125    /// The `pre_gain_factor` scales the signal before any processing is done.
126    pub pre_gain_factor: f32,
127
128    /// The `post_gain_factor` scales the signal after all processing is done.
129    pub post_gain_factor: f32,
130
131    /// Analog mic gain emulation.
132    pub analog_mic_gain_emulation: Option<AnalogMicGainEmulation>,
133}
134
135impl Default for CaptureLevelAdjustment {
136    fn default() -> Self {
137        Self { pre_gain_factor: 1.0, post_gain_factor: 1.0, analog_mic_gain_emulation: None }
138    }
139}
140
141/// Analog mic gain emulation for capture level adjustment.
142#[derive(Debug, Copy, Clone, PartialEq)]
143#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
144#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
145pub struct AnalogMicGainEmulation {
146    /// Initial analog gain level to use for the emulated analog gain. Must
147    /// be in the range [0...255].
148    pub initial_level: u8,
149}
150
151impl Default for AnalogMicGainEmulation {
152    fn default() -> Self {
153        Self { initial_level: 255 }
154    }
155}
156
157/// HPF (high-pass filter) configuration.
158#[derive(Debug, Copy, Clone, PartialEq)]
159#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
160#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
161pub struct HighPassFilter {
162    /// Whether or not HPF should be applied in the full-band (i.e. 20 – 20,000 Hz).
163    pub apply_in_full_band: bool,
164}
165
166impl Default for HighPassFilter {
167    fn default() -> Self {
168        Self { apply_in_full_band: true }
169    }
170}
171
172/// AEC (acoustic echo cancellation) configuration.
173/// Defaults to Full (AEC3) mode with delay estimation (stream_delay unset).
174///
175/// Functionality in the C++ library that we don't yet expose:
176/// - EchoCanceller::enforce_high_pass_filtering: hard-coded to true on Full, false on Mobile
177#[derive(Debug, Copy, Clone, PartialEq)]
178#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
179#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(tag = "type"))]
180#[cfg_attr(feature = "strum", derive(strum::Display, strum::EnumIter))]
181pub enum EchoCanceller {
182    /// Use low-complexity AEC implementation that is optimized for mobile.
183    /// Force-enables high pass filter.
184    #[cfg_attr(feature = "strum", strum(serialize = "Mobile (AECM)"))]
185    Mobile {
186        /// Set the delay in ms between process_render_frame() and process_capture_frame().
187        /// Mandatory for the Mobile echo canceller variant.
188        stream_delay_ms: u16,
189    },
190
191    /// Uses the full AEC3 implementation.
192    #[cfg_attr(feature = "strum", strum(serialize = "Full (AEC3)"))]
193    Full {
194        /// Set the delay in ms between process_render_frame() and process_capture_frame().
195        /// If None, we let the AEC processor try determining it.
196        stream_delay_ms: Option<u16>,
197    },
198}
199
200impl Default for EchoCanceller {
201    fn default() -> Self {
202        Self::Full { stream_delay_ms: None }
203    }
204}
205
206/// Enables background noise suppression.
207#[derive(Debug, Copy, Clone, PartialEq)]
208#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
209#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
210pub struct NoiseSuppression {
211    /// Determines the aggressiveness of the suppression. Increasing the level will reduce the
212    /// noise level at the expense of a higher speech distortion.
213    pub level: NoiseSuppressionLevel,
214
215    /// Analyze the output of the linear AEC instead of the capture frame.
216    ///
217    /// Only has effect if:
218    /// - echo cancellation is enabled and of Full (AEC3) type
219    /// - experimental AEC3 config was passed with `filter.export_linear_aec_output` = true.
220    pub analyze_linear_aec_output: bool,
221}
222
223impl Default for NoiseSuppression {
224    fn default() -> Self {
225        Self { level: NoiseSuppressionLevel::Moderate, analyze_linear_aec_output: false }
226    }
227}
228
229/// Noise suppression level.
230#[derive(Debug, Copy, Clone, PartialEq)]
231#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
232#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
233#[cfg_attr(feature = "strum", derive(strum::Display, strum::EnumIter))]
234pub enum NoiseSuppressionLevel {
235    /// Lower suppression level.
236    Low,
237    /// Moderate suppression level.
238    Moderate,
239    /// Higher suppression level.
240    High,
241    /// Even higher suppression level.
242    #[cfg_attr(feature = "strum", strum(serialize = "Very High"))]
243    VeryHigh,
244}
245
246/// A choice of the gain controller implementation.
247#[derive(Debug, Copy, Clone, PartialEq)]
248#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
249#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(tag = "type"))]
250#[cfg_attr(feature = "strum", derive(strum::Display, strum::EnumIter))]
251pub enum GainController {
252    /// Legacy gain controller 1.
253    #[cfg_attr(feature = "strum", strum(serialize = "Gain Controller 1"))]
254    GainController1(GainController1),
255    /// New gain controller 2.
256    #[cfg_attr(feature = "strum", strum(serialize = "Gain Controller 2"))]
257    GainController2(GainController2),
258}
259
260/// Enables automatic gain control (AGC) functionality.
261/// The automatic gain control (AGC) component brings the signal to an
262/// appropriate range. This is done by applying a digital gain directly and,
263/// in the analog mode, prescribing an analog gain to be applied at the audio
264/// HAL.
265/// Recommended to be enabled on the client-side.
266#[derive(Debug, Copy, Clone, PartialEq)]
267#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
268#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
269pub struct GainController1 {
270    /// AGC mode.
271    pub mode: GainControllerMode,
272
273    /// Sets the target peak level (or envelope) of the AGC in dBFs (decibels
274    /// from digital full-scale). The convention is to use positive values. For
275    /// instance, passing in a value of 3 corresponds to -3 dBFs, or a target
276    /// level 3 dB below full-scale. Limited to [0, 31].
277    pub target_level_dbfs: u8,
278
279    /// Sets the maximum gain the digital compression stage may apply, in dB. A
280    /// higher number corresponds to greater compression, while a value of 0
281    /// will leave the signal uncompressed. Limited to [0, 90].
282    ///
283    /// For updates after APM setup, the C++ upstream suggests using RuntimeSetting
284    /// instead (which is not yet exposed in the Rust wrapper).
285    pub compression_gain_db: u8,
286
287    /// When enabled, the compression stage will hard limit the signal to the
288    /// target level. Otherwise, the signal will be compressed but not limited
289    /// above the target level.
290    pub enable_limiter: bool,
291
292    /// Analog gain controller configuration.
293    pub analog_gain_controller: Option<AnalogGainController>,
294}
295
296impl Default for GainController1 {
297    fn default() -> Self {
298        Self {
299            mode: GainControllerMode::AdaptiveAnalog,
300            target_level_dbfs: 3,
301            compression_gain_db: 9,
302            enable_limiter: true,
303            analog_gain_controller: None,
304        }
305    }
306}
307
308/// Gain control mode.
309#[derive(Debug, Copy, Clone, PartialEq)]
310#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
311#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
312#[cfg_attr(feature = "strum", derive(strum::Display, strum::EnumIter))]
313pub enum GainControllerMode {
314    /// Adaptive mode intended for use if an analog volume control is
315    /// available on the capture device. It will require the user to provide
316    /// coupling between the OS mixer controls and AGC through the
317    /// stream_analog_level() functions.
318    /// It consists of an analog gain prescription for the audio device and a
319    /// digital compression stage.
320    #[cfg_attr(feature = "strum", strum(serialize = "Adaptive Analog"))]
321    AdaptiveAnalog,
322    /// Adaptive mode intended for situations in which an analog volume
323    /// control is unavailable. It operates in a similar fashion to the
324    /// adaptive analog mode, but with scaling instead applied in the digital
325    /// domain. As with the analog mode, it additionally uses a digital
326    /// compression stage.
327    #[cfg_attr(feature = "strum", strum(serialize = "Adaptive Digital"))]
328    AdaptiveDigital,
329    /// Fixed mode which enables only the digital compression stage also used
330    /// by the two adaptive modes.
331    /// It is distinguished from the adaptive modes by considering only a
332    /// short time-window of the input signal. It applies a fixed gain
333    /// through most of the input level range, and compresses (gradually
334    /// reduces gain with increasing level) the input signal at higher
335    /// levels. This mode is preferred on embedded devices where the capture
336    /// signal level is predictable, so that a known gain can be applied.
337    #[cfg_attr(feature = "strum", strum(serialize = "Fixed Digital"))]
338    FixedDigital,
339}
340
341/// Enables the analog gain controller functionality.
342#[derive(Debug, Copy, Clone, PartialEq)]
343#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
344#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
345pub struct AnalogGainController {
346    /// TODO(bugs.webrtc.org/7494): Will be deprecated.
347    pub startup_min_volume: i32,
348    /// Lowest analog microphone level that will be applied in response to
349    /// clipping.
350    pub clipped_level_min: i32,
351    /// If true, an adaptive digital gain is applied.
352    pub enable_digital_adaptive: bool,
353    /// Amount the microphone level is lowered with every clipping event.
354    /// Limited to (0, 255].
355    pub clipped_level_step: i32,
356    /// Proportion of clipped samples required to declare a clipping event.
357    /// Limited to (0.f, 1.f).
358    pub clipped_ratio_threshold: f32,
359    /// Time in frames to wait after a clipping event before checking again.
360    /// Limited to values higher than 0.
361    pub clipped_wait_frames: i32,
362    /// Clipping predictor.
363    pub clipping_predictor: Option<ClippingPredictor>,
364}
365
366impl Default for AnalogGainController {
367    fn default() -> Self {
368        Self {
369            startup_min_volume: 0,
370            clipped_level_min: 70,
371            enable_digital_adaptive: true,
372            clipped_level_step: 15,
373            clipped_ratio_threshold: 0.1,
374            clipped_wait_frames: 300,
375            clipping_predictor: None,
376        }
377    }
378}
379
380/// Enables clipping prediction functionality.
381#[derive(Debug, Copy, Clone, PartialEq)]
382#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
383#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
384pub struct ClippingPredictor {
385    /// Mode.
386    pub mode: ClippingPredictorMode,
387    /// Number of frames in the sliding analysis window.
388    pub window_length: i32,
389    /// Number of frames in the sliding reference window.
390    pub reference_window_length: i32,
391    /// Reference window delay (unit: number of frames).
392    pub reference_window_delay: i32,
393    /// Clipping prediction threshold (dBFS).
394    pub clipping_threshold: f32,
395    /// Crest factor drop threshold (dB).
396    pub crest_factor_margin: f32,
397    /// If true, the recommended clipped level step is used to modify the
398    /// analog gain. Otherwise, the predictor runs without affecting the
399    /// analog gain.
400    pub use_predicted_step: bool,
401}
402
403impl Default for ClippingPredictor {
404    fn default() -> Self {
405        Self {
406            mode: ClippingPredictorMode::ClippingEventPrediction,
407            window_length: 5,
408            reference_window_length: 5,
409            reference_window_delay: 5,
410            clipping_threshold: -1.0,
411            crest_factor_margin: 3.0,
412            use_predicted_step: true,
413        }
414    }
415}
416
417/// Clipping predictor mode.
418#[derive(Debug, Copy, Clone, PartialEq)]
419#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
420#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
421#[cfg_attr(feature = "strum", derive(strum::Display, strum::EnumIter))]
422pub enum ClippingPredictorMode {
423    /// Clipping event prediction mode with fixed step estimation.
424    #[cfg_attr(feature = "strum", strum(serialize = "Clipping Event Prediction"))]
425    ClippingEventPrediction,
426    /// Clipped peak estimation mode with adaptive step estimation.
427    #[cfg_attr(feature = "strum", strum(serialize = "Adaptive Step Clipping Peak Prediction"))]
428    AdaptiveStepClippingPeakPrediction,
429    /// Clipped peak estimation mode with fixed step estimation.
430    #[cfg_attr(feature = "strum", strum(serialize = "Fixed Step Clipping Peak Prediction"))]
431    FixedStepClippingPeakPrediction,
432}
433
434/// Parameters for AGC2, an Automatic Gain Control (AGC) sub-module which
435/// replaces the AGC sub-module parameterized by `gain_controller1`.
436/// AGC2 brings the captured audio signal to the desired level by combining
437/// three different controllers (namely, input volume controller, adaptive
438/// digital controller and fixed digital controller) and a limiter.
439#[derive(Debug, Copy, Default, Clone, PartialEq)]
440#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
441#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
442pub struct GainController2 {
443    /// Enables the input volume controller, which adjusts the input
444    /// volume applied when the audio is captured (e.g., microphone volume on
445    /// a soundcard, input volume on HAL).
446    pub input_volume_controller_enabled: bool,
447    /// Parameters for the adaptive digital controller, which adjusts and
448    /// applies a digital gain after echo cancellation and after noise
449    /// suppression.
450    pub adaptive_digital: Option<AdaptiveDigital>,
451    /// Parameters for the fixed digital controller, which applies a fixed
452    /// digital gain after the adaptive digital controller and before the
453    /// limiter.
454    pub fixed_digital: FixedDigital,
455}
456
457/// Parameters for the adaptive digital controller, which adjusts and
458/// applies a digital gain after echo cancellation and after noise
459/// suppression.
460#[derive(Debug, Copy, Clone, PartialEq)]
461#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
462#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
463pub struct AdaptiveDigital {
464    /// Headroom (dB).
465    pub headroom_db: f32,
466    /// Max gain (dB).
467    pub max_gain_db: f32,
468    /// Initial gain (dB).
469    pub initial_gain_db: f32,
470    /// Max gain change speed (dB/s).
471    pub max_gain_change_db_per_second: f32,
472    /// Max output noise level (dBFS).
473    pub max_output_noise_level_dbfs: f32,
474}
475
476impl Default for AdaptiveDigital {
477    fn default() -> Self {
478        Self {
479            headroom_db: 5.0,
480            max_gain_db: 50.0,
481            initial_gain_db: 15.0,
482            max_gain_change_db_per_second: 6.0,
483            max_output_noise_level_dbfs: -50.0,
484        }
485    }
486}
487
488/// Parameters for the fixed digital controller, which applies a fixed
489/// digital gain after the adaptive digital controller and before the
490/// limiter.
491#[derive(Debug, Copy, Clone, PartialEq)]
492#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
493#[cfg_attr(feature = "serde", derive(Serialize, Deserialize), serde(default))]
494pub struct FixedDigital {
495    /// By setting `gain_db` to a value greater than zero, the limiter can be
496    /// turned into a compressor that first applies a fixed gain.
497    pub gain_db: f32,
498}
499
500impl Default for FixedDigital {
501    fn default() -> Self {
502        Self { gain_db: 0.0 }
503    }
504}
webrtc_audio_processing_config/lib.rs

webrtc_audio_processing_config/
lib.rs