sonora_aec3/
config.rs

1//! AEC3 configuration.
2//!
3//! Ported from `api/audio/echo_canceller3_config.h/cc`.
4
5/// Configuration for the Echo Canceller 3.
6///
7/// This is a detailed internal configuration with many tuning parameters.
8/// Most users should not need to modify these values — the defaults match
9/// the upstream C++ WebRTC configuration. Use [`validate()`](Self::validate) to
10/// clamp all parameters to reasonable ranges.
11#[derive(Debug, Clone, Default)]
12pub struct EchoCanceller3Config {
13    /// Render buffering and excess detection settings.
14    pub buffering: Buffering,
15    /// Delay estimation and alignment settings.
16    pub delay: Delay,
17    /// Adaptive filter configuration.
18    pub filter: Filter,
19    /// Echo Return Loss Enhancement (ERLE) estimation settings.
20    pub erle: Erle,
21    /// Echo path strength estimation settings.
22    pub ep_strength: EpStrength,
23    /// Echo audibility detection settings.
24    pub echo_audibility: EchoAudibility,
25    /// Render signal power thresholds.
26    pub render_levels: RenderLevels,
27    /// Echo removal control settings.
28    pub echo_removal_control: EchoRemovalControl,
29    /// Echo model parameters.
30    pub echo_model: EchoModel,
31    /// Comfort noise generation settings.
32    pub comfort_noise: ComfortNoise,
33    /// Suppression filter settings.
34    pub suppressor: Suppressor,
35    /// Multi-channel processing settings.
36    pub multi_channel: MultiChannel,
37}
38
39impl EchoCanceller3Config {
40    /// Validates and clamps config parameters to reasonable ranges.
41    /// Returns `true` if no changes were needed.
42    pub fn validate(&mut self) -> bool {
43        let mut ok = true;
44
45        if self.delay.down_sampling_factor != 4 && self.delay.down_sampling_factor != 8 {
46            self.delay.down_sampling_factor = 4;
47            ok = false;
48        }
49
50        ok &= limit_usize(&mut self.delay.default_delay, 0, 5000);
51        ok &= limit_usize(&mut self.delay.num_filters, 0, 5000);
52        ok &= limit_usize(&mut self.delay.delay_headroom_samples, 0, 5000);
53        ok &= limit_usize(&mut self.delay.hysteresis_limit_blocks, 0, 5000);
54        ok &= limit_usize(&mut self.delay.fixed_capture_delay_samples, 0, 5000);
55        ok &= limit_f32(&mut self.delay.delay_estimate_smoothing, 0.0, 1.0);
56        ok &= limit_f32(
57            &mut self.delay.delay_candidate_detection_threshold,
58            0.0,
59            1.0,
60        );
61        ok &= limit_i32(&mut self.delay.delay_selection_thresholds.initial, 1, 250);
62        ok &= limit_i32(&mut self.delay.delay_selection_thresholds.converged, 1, 250);
63
64        ok &= floor_limit_usize(&mut self.filter.refined.length_blocks, 1);
65        ok &= limit_f32(&mut self.filter.refined.leakage_converged, 0.0, 1000.0);
66        ok &= limit_f32(&mut self.filter.refined.leakage_diverged, 0.0, 1000.0);
67        ok &= limit_f32(&mut self.filter.refined.error_floor, 0.0, 1000.0);
68        ok &= limit_f32(&mut self.filter.refined.error_ceil, 0.0, 100_000_000.0);
69        ok &= limit_f32(&mut self.filter.refined.noise_gate, 0.0, 100_000_000.0);
70
71        ok &= floor_limit_usize(&mut self.filter.refined_initial.length_blocks, 1);
72        ok &= limit_f32(
73            &mut self.filter.refined_initial.leakage_converged,
74            0.0,
75            1000.0,
76        );
77        ok &= limit_f32(
78            &mut self.filter.refined_initial.leakage_diverged,
79            0.0,
80            1000.0,
81        );
82        ok &= limit_f32(&mut self.filter.refined_initial.error_floor, 0.0, 1000.0);
83        ok &= limit_f32(
84            &mut self.filter.refined_initial.error_ceil,
85            0.0,
86            100_000_000.0,
87        );
88        ok &= limit_f32(
89            &mut self.filter.refined_initial.noise_gate,
90            0.0,
91            100_000_000.0,
92        );
93
94        if self.filter.refined.length_blocks < self.filter.refined_initial.length_blocks {
95            self.filter.refined_initial.length_blocks = self.filter.refined.length_blocks;
96            ok = false;
97        }
98
99        ok &= floor_limit_usize(&mut self.filter.coarse.length_blocks, 1);
100        ok &= limit_f32(&mut self.filter.coarse.rate, 0.0, 1.0);
101        ok &= limit_f32(&mut self.filter.coarse.noise_gate, 0.0, 100_000_000.0);
102
103        ok &= floor_limit_usize(&mut self.filter.coarse_initial.length_blocks, 1);
104        ok &= limit_f32(&mut self.filter.coarse_initial.rate, 0.0, 1.0);
105        ok &= limit_f32(
106            &mut self.filter.coarse_initial.noise_gate,
107            0.0,
108            100_000_000.0,
109        );
110
111        if self.filter.coarse.length_blocks < self.filter.coarse_initial.length_blocks {
112            self.filter.coarse_initial.length_blocks = self.filter.coarse.length_blocks;
113            ok = false;
114        }
115
116        ok &= limit_usize(&mut self.filter.config_change_duration_blocks, 0, 100_000);
117        ok &= limit_f32(&mut self.filter.initial_state_seconds, 0.0, 100.0);
118        ok &= limit_i32(&mut self.filter.coarse_reset_hangover_blocks, 0, 250_000);
119
120        ok &= limit_f32(&mut self.erle.min, 1.0, 100_000.0);
121        ok &= limit_f32(&mut self.erle.max_l, 1.0, 100_000.0);
122        ok &= limit_f32(&mut self.erle.max_h, 1.0, 100_000.0);
123        if self.erle.min > self.erle.max_l || self.erle.min > self.erle.max_h {
124            self.erle.min = self.erle.max_l.min(self.erle.max_h);
125            ok = false;
126        }
127        ok &= limit_usize(
128            &mut self.erle.num_sections,
129            1,
130            self.filter.refined.length_blocks,
131        );
132
133        ok &= limit_f32(&mut self.ep_strength.default_gain, 0.0, 1_000_000.0);
134        ok &= limit_f32(&mut self.ep_strength.default_len, -1.0, 1.0);
135        ok &= limit_f32(&mut self.ep_strength.nearend_len, -1.0, 1.0);
136
137        let max_power = 32768.0f32 * 32768.0;
138        ok &= limit_f32(&mut self.echo_audibility.low_render_limit, 0.0, max_power);
139        ok &= limit_f32(
140            &mut self.echo_audibility.normal_render_limit,
141            0.0,
142            max_power,
143        );
144        ok &= limit_f32(&mut self.echo_audibility.floor_power, 0.0, max_power);
145        ok &= limit_f32(
146            &mut self.echo_audibility.audibility_threshold_lf,
147            0.0,
148            max_power,
149        );
150        ok &= limit_f32(
151            &mut self.echo_audibility.audibility_threshold_mf,
152            0.0,
153            max_power,
154        );
155        ok &= limit_f32(
156            &mut self.echo_audibility.audibility_threshold_hf,
157            0.0,
158            max_power,
159        );
160
161        ok &= limit_f32(&mut self.render_levels.active_render_limit, 0.0, max_power);
162        ok &= limit_f32(
163            &mut self.render_levels.poor_excitation_render_limit,
164            0.0,
165            max_power,
166        );
167        ok &= limit_f32(
168            &mut self.render_levels.poor_excitation_render_limit_ds8,
169            0.0,
170            max_power,
171        );
172
173        ok &= limit_usize(&mut self.echo_model.noise_floor_hold, 0, 1000);
174        ok &= limit_f32(&mut self.echo_model.min_noise_floor_power, 0.0, 2_000_000.0);
175        ok &= limit_f32(&mut self.echo_model.stationary_gate_slope, 0.0, 1_000_000.0);
176        ok &= limit_f32(&mut self.echo_model.noise_gate_power, 0.0, 1_000_000.0);
177        ok &= limit_f32(&mut self.echo_model.noise_gate_slope, 0.0, 1_000_000.0);
178        ok &= limit_usize(&mut self.echo_model.render_pre_window_size, 0, 100);
179        ok &= limit_usize(&mut self.echo_model.render_post_window_size, 0, 100);
180
181        ok &= limit_f32(&mut self.comfort_noise.noise_floor_dbfs, -200.0, 0.0);
182
183        ok &= limit_usize(&mut self.suppressor.nearend_average_blocks, 1, 5000);
184
185        ok &= validate_tuning(&mut self.suppressor.normal_tuning);
186        ok &= validate_tuning(&mut self.suppressor.nearend_tuning);
187
188        ok &= limit_i32(&mut self.suppressor.last_permanent_lf_smoothing_band, 0, 64);
189        ok &= limit_i32(&mut self.suppressor.last_lf_smoothing_band, 0, 64);
190        ok &= limit_i32(&mut self.suppressor.last_lf_band, 0, 63);
191        ok &= limit_i32(
192            &mut self.suppressor.first_hf_band,
193            self.suppressor.last_lf_band + 1,
194            64,
195        );
196
197        ok &= limit_f32(
198            &mut self.suppressor.dominant_nearend_detection.enr_threshold,
199            0.0,
200            1_000_000.0,
201        );
202        ok &= limit_f32(
203            &mut self.suppressor.dominant_nearend_detection.snr_threshold,
204            0.0,
205            1_000_000.0,
206        );
207        ok &= limit_i32(
208            &mut self.suppressor.dominant_nearend_detection.hold_duration,
209            0,
210            10_000,
211        );
212        ok &= limit_i32(
213            &mut self.suppressor.dominant_nearend_detection.trigger_threshold,
214            0,
215            10_000,
216        );
217
218        ok &= limit_usize(
219            &mut self
220                .suppressor
221                .subband_nearend_detection
222                .nearend_average_blocks,
223            1,
224            1024,
225        );
226        ok &= limit_usize(
227            &mut self.suppressor.subband_nearend_detection.subband1.low,
228            0,
229            65,
230        );
231        ok &= limit_usize(
232            &mut self.suppressor.subband_nearend_detection.subband1.high,
233            self.suppressor.subband_nearend_detection.subband1.low,
234            65,
235        );
236        ok &= limit_usize(
237            &mut self.suppressor.subband_nearend_detection.subband2.low,
238            0,
239            65,
240        );
241        ok &= limit_usize(
242            &mut self.suppressor.subband_nearend_detection.subband2.high,
243            self.suppressor.subband_nearend_detection.subband2.low,
244            65,
245        );
246        ok &= limit_f32(
247            &mut self.suppressor.subband_nearend_detection.nearend_threshold,
248            0.0,
249            1.0e24,
250        );
251        ok &= limit_f32(
252            &mut self.suppressor.subband_nearend_detection.snr_threshold,
253            0.0,
254            1.0e24,
255        );
256
257        ok &= limit_f32(
258            &mut self.suppressor.high_bands_suppression.enr_threshold,
259            0.0,
260            1_000_000.0,
261        );
262        ok &= limit_f32(
263            &mut self.suppressor.high_bands_suppression.max_gain_during_echo,
264            0.0,
265            1.0,
266        );
267        ok &= limit_f32(
268            &mut self
269                .suppressor
270                .high_bands_suppression
271                .anti_howling_activation_threshold,
272            0.0,
273            max_power,
274        );
275        ok &= limit_f32(
276            &mut self.suppressor.high_bands_suppression.anti_howling_gain,
277            0.0,
278            1.0,
279        );
280
281        ok &= limit_i32(
282            &mut self
283                .suppressor
284                .high_frequency_suppression
285                .limiting_gain_band,
286            1,
287            64,
288        );
289        ok &= limit_i32(
290            &mut self
291                .suppressor
292                .high_frequency_suppression
293                .bands_in_limiting_gain,
294            0,
295            64 - self
296                .suppressor
297                .high_frequency_suppression
298                .limiting_gain_band,
299        );
300
301        ok &= limit_f32(&mut self.suppressor.floor_first_increase, 0.0, 1_000_000.0);
302
303        ok
304    }
305
306    /// Creates the default configuration tuned for multichannel.
307    pub fn create_default_multichannel_config() -> Self {
308        let mut cfg = Self::default();
309        cfg.filter.coarse.length_blocks = 11;
310        cfg.filter.coarse.rate = 0.95;
311        cfg.filter.coarse_initial.length_blocks = 11;
312        cfg.filter.coarse_initial.rate = 0.95;
313        cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35;
314        cfg.suppressor.normal_tuning.max_inc_factor = 1.5;
315        cfg
316    }
317}
318
319fn validate_tuning(t: &mut Tuning) -> bool {
320    let mut ok = true;
321    ok &= limit_f32(&mut t.mask_lf.enr_transparent, 0.0, 100.0);
322    ok &= limit_f32(&mut t.mask_lf.enr_suppress, 0.0, 100.0);
323    ok &= limit_f32(&mut t.mask_lf.emr_transparent, 0.0, 100.0);
324    ok &= limit_f32(&mut t.mask_hf.enr_transparent, 0.0, 100.0);
325    ok &= limit_f32(&mut t.mask_hf.enr_suppress, 0.0, 100.0);
326    ok &= limit_f32(&mut t.mask_hf.emr_transparent, 0.0, 100.0);
327    ok &= limit_f32(&mut t.max_inc_factor, 0.0, 100.0);
328    ok &= limit_f32(&mut t.max_dec_factor_lf, 0.0, 100.0);
329    ok
330}
331
332fn limit_f32(value: &mut f32, min: f32, max: f32) -> bool {
333    let clamped = value.clamp(min, max);
334    let clamped = if clamped.is_finite() { clamped } else { min };
335    let unchanged = *value == clamped;
336    *value = clamped;
337    unchanged
338}
339
340fn limit_usize(value: &mut usize, min: usize, max: usize) -> bool {
341    let clamped = (*value).clamp(min, max);
342    let unchanged = *value == clamped;
343    *value = clamped;
344    unchanged
345}
346
347fn limit_i32(value: &mut i32, min: i32, max: i32) -> bool {
348    let clamped = (*value).clamp(min, max);
349    let unchanged = *value == clamped;
350    *value = clamped;
351    unchanged
352}
353
354fn floor_limit_usize(value: &mut usize, min: usize) -> bool {
355    if *value < min {
356        *value = min;
357        false
358    } else {
359        true
360    }
361}
362
363// --- Sub-config structs ---
364
365/// Render buffer excess detection settings.
366#[derive(Debug, Clone)]
367pub struct Buffering {
368    /// Interval in blocks between excess render detection checks (default: 250).
369    pub excess_render_detection_interval_blocks: usize,
370    /// Maximum allowed excess render blocks before triggering correction (default: 8).
371    pub max_allowed_excess_render_blocks: usize,
372}
373
374impl Default for Buffering {
375    fn default() -> Self {
376        Self {
377            excess_render_detection_interval_blocks: 250,
378            max_allowed_excess_render_blocks: 8,
379        }
380    }
381}
382
383/// Thresholds for delay estimator convergence detection.
384#[derive(Debug, Clone)]
385pub struct DelaySelectionThresholds {
386    /// Threshold used during the initial phase before convergence (default: 5).
387    pub initial: i32,
388    /// Threshold used after the delay estimator has converged (default: 20).
389    pub converged: i32,
390}
391
392/// Multichannel alignment mixing strategy.
393#[derive(Debug, Clone)]
394pub struct AlignmentMixing {
395    /// Whether to downmix multiple channels to mono for alignment.
396    pub downmix: bool,
397    /// Whether to adaptively select the best channel for alignment.
398    pub adaptive_selection: bool,
399    /// Power threshold for considering a channel as active (default: 10000.0).
400    pub activity_power_threshold: f32,
401    /// Whether to prefer the first two channels when selecting alignment reference.
402    pub prefer_first_two_channels: bool,
403}
404
405/// Delay estimation and alignment parameters.
406#[derive(Debug, Clone)]
407pub struct Delay {
408    /// Default delay in blocks before estimation converges (default: 5).
409    pub default_delay: usize,
410    /// Down-sampling factor for the delay estimator; must be 4 or 8 (default: 4).
411    pub down_sampling_factor: usize,
412    /// Number of correlator filters used for delay estimation (default: 5).
413    pub num_filters: usize,
414    /// Extra headroom in samples added to the estimated delay (default: 32).
415    pub delay_headroom_samples: usize,
416    /// Hysteresis in blocks before accepting a new delay estimate (default: 1).
417    pub hysteresis_limit_blocks: usize,
418    /// Fixed capture delay override in samples; 0 means use estimation (default: 0).
419    pub fixed_capture_delay_samples: usize,
420    /// Smoothing factor for delay estimates in [0, 1] (default: 0.7).
421    pub delay_estimate_smoothing: f32,
422    /// Smoothing factor for delay estimates after delay is found (default: 0.7).
423    pub delay_estimate_smoothing_delay_found: f32,
424    /// Correlation threshold for detecting a delay candidate (default: 0.2).
425    pub delay_candidate_detection_threshold: f32,
426    /// Convergence thresholds for delay selection.
427    pub delay_selection_thresholds: DelaySelectionThresholds,
428    /// Whether to use an externally provided delay estimate.
429    pub use_external_delay_estimator: bool,
430    /// Whether to log warnings when the delay estimate changes.
431    pub log_warning_on_delay_changes: bool,
432    /// Alignment mixing settings for the render signal.
433    pub render_alignment_mixing: AlignmentMixing,
434    /// Alignment mixing settings for the capture signal.
435    pub capture_alignment_mixing: AlignmentMixing,
436    /// Whether to detect and compensate for pre-echo artifacts.
437    pub detect_pre_echo: bool,
438}
439
440impl Default for Delay {
441    fn default() -> Self {
442        Self {
443            default_delay: 5,
444            down_sampling_factor: 4,
445            num_filters: 5,
446            delay_headroom_samples: 32,
447            hysteresis_limit_blocks: 1,
448            fixed_capture_delay_samples: 0,
449            delay_estimate_smoothing: 0.7,
450            delay_estimate_smoothing_delay_found: 0.7,
451            delay_candidate_detection_threshold: 0.2,
452            delay_selection_thresholds: DelaySelectionThresholds {
453                initial: 5,
454                converged: 20,
455            },
456            use_external_delay_estimator: false,
457            log_warning_on_delay_changes: false,
458            render_alignment_mixing: AlignmentMixing {
459                downmix: false,
460                adaptive_selection: true,
461                activity_power_threshold: 10000.0,
462                prefer_first_two_channels: true,
463            },
464            capture_alignment_mixing: AlignmentMixing {
465                downmix: false,
466                adaptive_selection: true,
467                activity_power_threshold: 10000.0,
468                prefer_first_two_channels: false,
469            },
470            detect_pre_echo: true,
471        }
472    }
473}
474
475/// Configuration for the refined (main) adaptive filter.
476#[derive(Debug, Clone)]
477pub struct RefinedConfiguration {
478    /// Filter length in blocks (default: 13, initial: 12).
479    pub length_blocks: usize,
480    /// Leakage factor when the filter has converged (default: 0.00005).
481    pub leakage_converged: f32,
482    /// Leakage factor when the filter has diverged (default: 0.05).
483    pub leakage_diverged: f32,
484    /// Minimum error floor to prevent division by zero (default: 0.001).
485    pub error_floor: f32,
486    /// Maximum error ceiling to limit adaptation (default: 2.0).
487    pub error_ceil: f32,
488    /// Power threshold below which adaptation is gated (default: 20075344.0).
489    pub noise_gate: f32,
490}
491
492/// Configuration for the coarse (shadow) adaptive filter.
493#[derive(Debug, Clone)]
494pub struct CoarseConfiguration {
495    /// Filter length in blocks (default: 13, initial: 12).
496    pub length_blocks: usize,
497    /// Adaptation step-size rate in [0, 1] (default: 0.7, initial: 0.9).
498    pub rate: f32,
499    /// Power threshold below which adaptation is gated (default: 20075344.0).
500    pub noise_gate: f32,
501}
502
503/// Adaptive filter adaptation settings.
504#[derive(Debug, Clone)]
505pub struct Filter {
506    /// Refined (main) adaptive filter configuration.
507    pub refined: RefinedConfiguration,
508    /// Coarse (shadow) adaptive filter configuration.
509    pub coarse: CoarseConfiguration,
510    /// Refined filter configuration used during the initial phase.
511    pub refined_initial: RefinedConfiguration,
512    /// Coarse filter configuration used during the initial phase.
513    pub coarse_initial: CoarseConfiguration,
514    /// Duration in blocks for transitioning between config changes (default: 250).
515    pub config_change_duration_blocks: usize,
516    /// Duration in seconds of the initial adaptation phase (default: 2.5).
517    pub initial_state_seconds: f32,
518    /// Hangover in blocks after a coarse filter reset (default: 25).
519    pub coarse_reset_hangover_blocks: i32,
520    /// Whether to use a conservative strategy during the initial phase.
521    pub conservative_initial_phase: bool,
522    /// Whether to allow using the coarse filter output for echo subtraction.
523    pub enable_coarse_filter_output_usage: bool,
524    /// Whether to use the linear adaptive filter for echo removal.
525    pub use_linear_filter: bool,
526    /// Whether to high-pass filter the echo reference signal.
527    pub high_pass_filter_echo_reference: bool,
528    /// Whether to export the linear AEC output for external use.
529    pub export_linear_aec_output: bool,
530}
531
532impl Default for Filter {
533    fn default() -> Self {
534        Self {
535            refined: RefinedConfiguration {
536                length_blocks: 13,
537                leakage_converged: 0.00005,
538                leakage_diverged: 0.05,
539                error_floor: 0.001,
540                error_ceil: 2.0,
541                noise_gate: 20_075_344.0,
542            },
543            coarse: CoarseConfiguration {
544                length_blocks: 13,
545                rate: 0.7,
546                noise_gate: 20_075_344.0,
547            },
548            refined_initial: RefinedConfiguration {
549                length_blocks: 12,
550                leakage_converged: 0.005,
551                leakage_diverged: 0.5,
552                error_floor: 0.001,
553                error_ceil: 2.0,
554                noise_gate: 20_075_344.0,
555            },
556            coarse_initial: CoarseConfiguration {
557                length_blocks: 12,
558                rate: 0.9,
559                noise_gate: 20_075_344.0,
560            },
561            config_change_duration_blocks: 250,
562            initial_state_seconds: 2.5,
563            coarse_reset_hangover_blocks: 25,
564            conservative_initial_phase: false,
565            enable_coarse_filter_output_usage: true,
566            use_linear_filter: true,
567            high_pass_filter_echo_reference: false,
568            export_linear_aec_output: false,
569        }
570    }
571}
572
573/// Echo Return Loss Enhancement (ERLE) estimation parameters.
574#[derive(Debug, Clone)]
575pub struct Erle {
576    /// Minimum ERLE value in linear scale (default: 1.0).
577    pub min: f32,
578    /// Maximum ERLE for LF bands in linear scale (default: 4.0).
579    pub max_l: f32,
580    /// Maximum ERLE for HF bands in linear scale (default: 1.5).
581    pub max_h: f32,
582    /// Whether to use onset detection to reset ERLE estimates.
583    pub onset_detection: bool,
584    /// Number of frequency sections for ERLE estimation (default: 1).
585    pub num_sections: usize,
586    /// Whether to clamp the filter quality estimate at zero.
587    pub clamp_quality_estimate_to_zero: bool,
588    /// Whether to clamp the filter quality estimate at one.
589    pub clamp_quality_estimate_to_one: bool,
590}
591
592impl Default for Erle {
593    fn default() -> Self {
594        Self {
595            min: 1.0,
596            max_l: 4.0,
597            max_h: 1.5,
598            onset_detection: true,
599            num_sections: 1,
600            clamp_quality_estimate_to_zero: true,
601            clamp_quality_estimate_to_one: true,
602        }
603    }
604}
605
606/// Echo path strength and suppression gain parameters.
607#[derive(Debug, Clone)]
608pub struct EpStrength {
609    /// Default echo path gain applied to the suppressor (default: 1.0).
610    pub default_gain: f32,
611    /// Echo path tail length as a fraction in [-1, 1] (default: 0.83).
612    pub default_len: f32,
613    /// Echo path tail length during dominant nearend in [-1, 1] (default: 0.83).
614    pub nearend_len: f32,
615    /// Whether the echo path can introduce saturation/clipping.
616    pub echo_can_saturate: bool,
617    /// Whether to bound the ERL estimate.
618    pub bounded_erl: bool,
619    /// Whether to compensate ERLE onset during dominant nearend detection.
620    pub erle_onset_compensation_in_dominant_nearend: bool,
621    /// Whether to use a conservative tail frequency response estimate.
622    pub use_conservative_tail_frequency_response: bool,
623}
624
625impl Default for EpStrength {
626    fn default() -> Self {
627        Self {
628            default_gain: 1.0,
629            default_len: 0.83,
630            nearend_len: 0.83,
631            echo_can_saturate: true,
632            bounded_erl: false,
633            erle_onset_compensation_in_dominant_nearend: false,
634            use_conservative_tail_frequency_response: true,
635        }
636    }
637}
638
639/// Echo audibility detection parameters.
640#[derive(Debug, Clone)]
641pub struct EchoAudibility {
642    /// Render power threshold for low-activity detection (default: 256.0).
643    pub low_render_limit: f32,
644    /// Render power threshold for normal-activity detection (default: 64.0).
645    pub normal_render_limit: f32,
646    /// Minimum floor power for audibility computation (default: 128.0).
647    pub floor_power: f32,
648    /// Audibility threshold for LF bands (default: 10.0).
649    pub audibility_threshold_lf: f32,
650    /// Audibility threshold for mid-frequency bands (default: 10.0).
651    pub audibility_threshold_mf: f32,
652    /// Audibility threshold for HF bands (default: 10.0).
653    pub audibility_threshold_hf: f32,
654    /// Whether to use signal stationarity properties for audibility detection.
655    pub use_stationarity_properties: bool,
656    /// Whether to use stationarity properties during the initial phase.
657    pub use_stationarity_properties_at_init: bool,
658}
659
660impl Default for EchoAudibility {
661    fn default() -> Self {
662        Self {
663            low_render_limit: 4.0 * 64.0,
664            normal_render_limit: 64.0,
665            floor_power: 2.0 * 64.0,
666            audibility_threshold_lf: 10.0,
667            audibility_threshold_mf: 10.0,
668            audibility_threshold_hf: 10.0,
669            use_stationarity_properties: false,
670            use_stationarity_properties_at_init: false,
671        }
672    }
673}
674
675/// Render signal level thresholds.
676#[derive(Debug, Clone)]
677pub struct RenderLevels {
678    /// Power threshold above which the render signal is considered active (default: 100.0).
679    pub active_render_limit: f32,
680    /// Power threshold below which render excitation is considered poor (default: 150.0).
681    pub poor_excitation_render_limit: f32,
682    /// Poor excitation threshold for 8x down-sampled signals (default: 20.0).
683    pub poor_excitation_render_limit_ds8: f32,
684    /// Gain in dB applied to the render power estimate (default: 0.0).
685    pub render_power_gain_db: f32,
686}
687
688impl Default for RenderLevels {
689    fn default() -> Self {
690        Self {
691            active_render_limit: 100.0,
692            poor_excitation_render_limit: 150.0,
693            poor_excitation_render_limit_ds8: 20.0,
694            render_power_gain_db: 0.0,
695        }
696    }
697}
698
699/// Selects the transparent mode algorithm for AEC3.
700///
701/// Transparent mode detects scenarios where no echo is present (e.g. headset
702/// use) and reduces suppression accordingly.
703#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
704pub enum TransparentModeType {
705    /// Counter-based heuristic (the default).
706    #[default]
707    Legacy,
708    /// Two-state Hidden Markov Model classifier.
709    ///
710    /// Uses Bayesian inference on filter convergence observations to estimate
711    /// the probability of being in a "transparent" (no-echo) state. Generally
712    /// more responsive than Legacy mode.
713    Hmm,
714}
715
716/// Top-level echo removal control settings.
717#[derive(Debug, Clone, Default)]
718pub struct EchoRemovalControl {
719    /// Whether the render and capture clocks are drifting relative to each other.
720    pub has_clock_drift: bool,
721    /// Whether the echo path is linear and stable (e.g. loopback scenarios).
722    pub linear_and_stable_echo_path: bool,
723    /// Which transparent mode algorithm to use.
724    pub transparent_mode: TransparentModeType,
725}
726
727/// Echo and noise model parameters.
728#[derive(Debug, Clone)]
729pub struct EchoModel {
730    /// Number of blocks to hold the noise floor estimate (default: 50).
731    pub noise_floor_hold: usize,
732    /// Minimum noise floor power level (default: 1638400.0).
733    pub min_noise_floor_power: f32,
734    /// Slope of the stationarity gate function (default: 10.0).
735    pub stationary_gate_slope: f32,
736    /// Power threshold for the noise gate (default: 27509.42).
737    pub noise_gate_power: f32,
738    /// Slope of the noise gate transition (default: 0.3).
739    pub noise_gate_slope: f32,
740    /// Number of blocks before the current block in the render window (default: 1).
741    pub render_pre_window_size: usize,
742    /// Number of blocks after the current block in the render window (default: 1).
743    pub render_post_window_size: usize,
744    /// Whether to model reverb in nonlinear processing mode.
745    pub model_reverb_in_nonlinear_mode: bool,
746}
747
748impl Default for EchoModel {
749    fn default() -> Self {
750        Self {
751            noise_floor_hold: 50,
752            min_noise_floor_power: 1_638_400.0,
753            stationary_gate_slope: 10.0,
754            noise_gate_power: 27509.42,
755            noise_gate_slope: 0.3,
756            render_pre_window_size: 1,
757            render_post_window_size: 1,
758            model_reverb_in_nonlinear_mode: true,
759        }
760    }
761}
762
763/// Comfort noise generation settings.
764#[derive(Debug, Clone)]
765pub struct ComfortNoise {
766    /// Noise floor level in dBFS for comfort noise injection (default: -96.03).
767    pub noise_floor_dbfs: f32,
768}
769
770impl Default for ComfortNoise {
771    fn default() -> Self {
772        Self {
773            noise_floor_dbfs: -96.03406,
774        }
775    }
776}
777
778/// Suppression masking thresholds based on ENR and EMR.
779#[derive(Debug, Clone)]
780pub struct MaskingThresholds {
781    /// ENR threshold below which the signal is treated as transparent (no suppression).
782    pub enr_transparent: f32,
783    /// ENR threshold above which full suppression is applied.
784    pub enr_suppress: f32,
785    /// EMR threshold below which the signal is treated as transparent.
786    pub emr_transparent: f32,
787}
788
789/// Suppressor tuning with LF/HF masking thresholds and gain limits.
790#[derive(Debug, Clone)]
791pub struct Tuning {
792    /// Masking thresholds for LF bands.
793    pub mask_lf: MaskingThresholds,
794    /// Masking thresholds for HF bands.
795    pub mask_hf: MaskingThresholds,
796    /// Maximum gain increase factor per block (default: 2.0).
797    pub max_inc_factor: f32,
798    /// Maximum gain decrease factor for LF bands per block (default: 0.25).
799    pub max_dec_factor_lf: f32,
800}
801
802/// Dominant nearend speech detection parameters.
803#[derive(Debug, Clone)]
804pub struct DominantNearendDetection {
805    /// ENR threshold to enter nearend-dominant state (default: 0.25).
806    pub enr_threshold: f32,
807    /// ENR threshold to exit nearend-dominant state (default: 10.0).
808    pub enr_exit_threshold: f32,
809    /// SNR threshold for nearend detection (default: 30.0).
810    pub snr_threshold: f32,
811    /// Number of blocks to hold the nearend-dominant state (default: 50).
812    pub hold_duration: i32,
813    /// Number of bands that must exceed the threshold to trigger (default: 12).
814    pub trigger_threshold: i32,
815    /// Whether to use nearend detection during the initial adaptation phase.
816    pub use_during_initial_phase: bool,
817    /// Whether to use an unbounded echo spectrum estimate for detection.
818    pub use_unbounded_echo_spectrum: bool,
819}
820
821impl Default for DominantNearendDetection {
822    fn default() -> Self {
823        Self {
824            enr_threshold: 0.25,
825            enr_exit_threshold: 10.0,
826            snr_threshold: 30.0,
827            hold_duration: 50,
828            trigger_threshold: 12,
829            use_during_initial_phase: true,
830            use_unbounded_echo_spectrum: true,
831        }
832    }
833}
834
835/// A frequency subband range specified by low and high bin indices.
836#[derive(Debug, Clone)]
837pub struct SubbandRegion {
838    /// Lower frequency bin index (inclusive).
839    pub low: usize,
840    /// Upper frequency bin index (inclusive).
841    pub high: usize,
842}
843
844/// Subband-based nearend speech detection parameters.
845#[derive(Debug, Clone)]
846pub struct SubbandNearendDetection {
847    /// Number of blocks to average for nearend power estimation (default: 1).
848    pub nearend_average_blocks: usize,
849    /// First subband region for nearend detection.
850    pub subband1: SubbandRegion,
851    /// Second subband region for nearend detection.
852    pub subband2: SubbandRegion,
853    /// Nearend power threshold for detection (default: 1.0).
854    pub nearend_threshold: f32,
855    /// SNR threshold for subband nearend detection (default: 1.0).
856    pub snr_threshold: f32,
857}
858
859impl Default for SubbandNearendDetection {
860    fn default() -> Self {
861        Self {
862            nearend_average_blocks: 1,
863            subband1: SubbandRegion { low: 1, high: 1 },
864            subband2: SubbandRegion { low: 1, high: 1 },
865            nearend_threshold: 1.0,
866            snr_threshold: 1.0,
867        }
868    }
869}
870
871/// High-band suppression and anti-howling settings.
872#[derive(Debug, Clone)]
873pub struct HighBandsSuppression {
874    /// ENR threshold for activating high-band suppression (default: 1.0).
875    pub enr_threshold: f32,
876    /// Maximum gain applied to high bands during echo (default: 1.0).
877    pub max_gain_during_echo: f32,
878    /// Power threshold to activate anti-howling protection (default: 400.0).
879    pub anti_howling_activation_threshold: f32,
880    /// Gain applied when anti-howling is active (default: 1.0).
881    pub anti_howling_gain: f32,
882}
883
884impl Default for HighBandsSuppression {
885    fn default() -> Self {
886        Self {
887            enr_threshold: 1.0,
888            max_gain_during_echo: 1.0,
889            anti_howling_activation_threshold: 400.0,
890            anti_howling_gain: 1.0,
891        }
892    }
893}
894
895/// HF gain limiting parameters.
896#[derive(Debug, Clone)]
897pub struct HighFrequencySuppression {
898    /// Starting band index for HF gain limiting (default: 16).
899    pub limiting_gain_band: i32,
900    /// Number of bands over which HF gain limiting is applied (default: 1).
901    pub bands_in_limiting_gain: i32,
902}
903
904impl Default for HighFrequencySuppression {
905    fn default() -> Self {
906        Self {
907            limiting_gain_band: 16,
908            bands_in_limiting_gain: 1,
909        }
910    }
911}
912
913/// Top-level suppressor configuration.
914#[derive(Debug, Clone)]
915pub struct Suppressor {
916    /// Number of blocks to average for nearend power estimation (default: 4).
917    pub nearend_average_blocks: usize,
918    /// Tuning parameters used during normal (non-nearend) operation.
919    pub normal_tuning: Tuning,
920    /// Tuning parameters used during dominant nearend conditions.
921    pub nearend_tuning: Tuning,
922    /// Whether to apply LF gain smoothing during the initial adaptation phase.
923    pub lf_smoothing_during_initial_phase: bool,
924    /// Last band index with permanent LF gain smoothing (default: 0).
925    pub last_permanent_lf_smoothing_band: i32,
926    /// Last band index with LF gain smoothing (default: 5).
927    pub last_lf_smoothing_band: i32,
928    /// Last band index considered as LF (default: 5).
929    pub last_lf_band: i32,
930    /// First band index considered as HF (default: 8).
931    pub first_hf_band: i32,
932    /// Dominant nearend speech detection settings.
933    pub dominant_nearend_detection: DominantNearendDetection,
934    /// Subband-based nearend detection settings.
935    pub subband_nearend_detection: SubbandNearendDetection,
936    /// Whether to use subband nearend detection instead of dominant nearend detection.
937    pub use_subband_nearend_detection: bool,
938    /// High-band suppression and anti-howling settings.
939    pub high_bands_suppression: HighBandsSuppression,
940    /// HF gain limiting settings.
941    pub high_frequency_suppression: HighFrequencySuppression,
942    /// Initial suppression gain floor increase step (default: 0.00001).
943    pub floor_first_increase: f32,
944    /// Whether to apply conservative suppression in HF bands.
945    pub conservative_hf_suppression: bool,
946}
947
948impl Default for Suppressor {
949    fn default() -> Self {
950        Self {
951            nearend_average_blocks: 4,
952            normal_tuning: Tuning {
953                mask_lf: MaskingThresholds {
954                    enr_transparent: 0.3,
955                    enr_suppress: 0.4,
956                    emr_transparent: 0.3,
957                },
958                mask_hf: MaskingThresholds {
959                    enr_transparent: 0.07,
960                    enr_suppress: 0.1,
961                    emr_transparent: 0.3,
962                },
963                max_inc_factor: 2.0,
964                max_dec_factor_lf: 0.25,
965            },
966            nearend_tuning: Tuning {
967                mask_lf: MaskingThresholds {
968                    enr_transparent: 1.09,
969                    enr_suppress: 1.1,
970                    emr_transparent: 0.3,
971                },
972                mask_hf: MaskingThresholds {
973                    enr_transparent: 0.1,
974                    enr_suppress: 0.3,
975                    emr_transparent: 0.3,
976                },
977                max_inc_factor: 2.0,
978                max_dec_factor_lf: 0.25,
979            },
980            lf_smoothing_during_initial_phase: true,
981            last_permanent_lf_smoothing_band: 0,
982            last_lf_smoothing_band: 5,
983            last_lf_band: 5,
984            first_hf_band: 8,
985            dominant_nearend_detection: DominantNearendDetection::default(),
986            subband_nearend_detection: SubbandNearendDetection::default(),
987            use_subband_nearend_detection: false,
988            high_bands_suppression: HighBandsSuppression::default(),
989            high_frequency_suppression: HighFrequencySuppression::default(),
990            floor_first_increase: 0.00001,
991            conservative_hf_suppression: false,
992        }
993    }
994}
995
996/// Multichannel and stereo content detection settings.
997#[derive(Debug, Clone)]
998pub struct MultiChannel {
999    /// Whether to detect stereo content and adapt processing accordingly.
1000    pub detect_stereo_content: bool,
1001    /// Power difference threshold for stereo detection (default: 0.0).
1002    pub stereo_detection_threshold: f32,
1003    /// Timeout in seconds before resetting stereo detection (default: 300).
1004    pub stereo_detection_timeout_threshold_seconds: i32,
1005    /// Hysteresis duration in seconds for stereo detection state changes (default: 2.0).
1006    pub stereo_detection_hysteresis_seconds: f32,
1007}
1008
1009impl Default for MultiChannel {
1010    fn default() -> Self {
1011        Self {
1012            detect_stereo_content: true,
1013            stereo_detection_threshold: 0.0,
1014            stereo_detection_timeout_threshold_seconds: 300,
1015            stereo_detection_hysteresis_seconds: 2.0,
1016        }
1017    }
1018}
1019
1020#[cfg(test)]
1021mod tests {
1022    use super::*;
1023
1024    #[test]
1025    fn out_of_range_values_are_clamped() {
1026        let mut cfg = EchoCanceller3Config::default();
1027        cfg.delay.down_sampling_factor = 3; // invalid, must be 4 or 8
1028        cfg.erle.min = 200_000.0; // above max of 100_000
1029        assert!(!cfg.validate());
1030        assert_eq!(cfg.delay.down_sampling_factor, 4);
1031        // erle.min gets clamped to 100_000 first, but then the
1032        // `min > max_l || min > max_h` check clamps it further to
1033        // min(max_l=4.0, max_h=1.5) = 1.5.
1034        assert!((cfg.erle.min - 1.5).abs() < 0.01);
1035    }
1036}
sonora_aec3/config.rs

sonora_aec3/
config.rs