webrtc-audio-processing-sys 2.0.2

A wrapper for WebRTC's AudioProcessing module.
/*
 *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "modules/audio_processing/gain_controller2.h"

#include <memory>
#include <utility>

#include "api/audio/audio_frame.h"
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/cpu_features.h"
#include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/include/audio_frame_view.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/strings/string_builder.h"
#include "system_wrappers/include/field_trial.h"

namespace webrtc {
namespace {

using Agc2Config = AudioProcessing::Config::GainController2;
using InputVolumeControllerConfig = InputVolumeController::Config;

constexpr int kLogLimiterStatsPeriodMs = 30'000;
constexpr int kFrameLengthMs = 10;
constexpr int kLogLimiterStatsPeriodNumFrames =
    kLogLimiterStatsPeriodMs / kFrameLengthMs;

// Detects the available CPU features and applies any kill-switches.
AvailableCpuFeatures GetAllowedCpuFeatures() {
  AvailableCpuFeatures features = GetAvailableCpuFeatures();
  if (field_trial::IsEnabled("WebRTC-Agc2SimdSse2KillSwitch")) {
    features.sse2 = false;
  }
  if (field_trial::IsEnabled("WebRTC-Agc2SimdAvx2KillSwitch")) {
    features.avx2 = false;
  }
  if (field_trial::IsEnabled("WebRTC-Agc2SimdNeonKillSwitch")) {
    features.neon = false;
  }
  return features;
}

// Peak and RMS audio levels in dBFS.
struct AudioLevels {
  float peak_dbfs;
  float rms_dbfs;
};

// Speech level info.
struct SpeechLevel {
  bool is_confident;
  float rms_dbfs;
};

// Computes the audio levels for the first channel in `frame`.
AudioLevels ComputeAudioLevels(DeinterleavedView<float> frame,
                               ApmDataDumper& data_dumper) {
  float peak = 0.0f;
  float rms = 0.0f;
  for (const auto& x : frame[0]) {
    peak = std::max(std::fabs(x), peak);
    rms += x * x;
  }
  AudioLevels levels{
      FloatS16ToDbfs(peak),
      FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))};
  data_dumper.DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs);
  data_dumper.DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs);
  return levels;
}

}  // namespace

std::atomic<int> GainController2::instance_count_(0);

GainController2::GainController2(
    const Agc2Config& config,
    const InputVolumeControllerConfig& input_volume_controller_config,
    int sample_rate_hz,
    int num_channels,
    bool use_internal_vad)
    : cpu_features_(GetAllowedCpuFeatures()),
      data_dumper_(instance_count_.fetch_add(1) + 1),
      fixed_gain_applier_(
          /*hard_clip_samples=*/false,
          /*initial_gain_factor=*/DbToRatio(config.fixed_digital.gain_db)),
      limiter_(&data_dumper_,
               SampleRateToDefaultChannelSize(sample_rate_hz),
               /*histogram_name_prefix=*/"Agc2"),
      calls_since_last_limiter_log_(0) {
  RTC_DCHECK(Validate(config));
  data_dumper_.InitiateNewSetOfRecordings();

  if (config.input_volume_controller.enabled ||
      config.adaptive_digital.enabled) {
    // Create dependencies.
    speech_level_estimator_ = std::make_unique<SpeechLevelEstimator>(
        &data_dumper_, config.adaptive_digital, kAdjacentSpeechFramesThreshold);
    if (use_internal_vad)
      vad_ = std::make_unique<VoiceActivityDetectorWrapper>(
          kVadResetPeriodMs, cpu_features_, sample_rate_hz);
  }

  if (config.input_volume_controller.enabled) {
    // Create controller.
    input_volume_controller_ = std::make_unique<InputVolumeController>(
        num_channels, input_volume_controller_config);
    // TODO(bugs.webrtc.org/7494): Call `Initialize` in ctor and remove method.
    input_volume_controller_->Initialize();
  }

  if (config.adaptive_digital.enabled) {
    // Create dependencies.
    noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_);
    saturation_protector_ = CreateSaturationProtector(
        kSaturationProtectorInitialHeadroomDb, kAdjacentSpeechFramesThreshold,
        &data_dumper_);
    // Create controller.
    adaptive_digital_controller_ =
        std::make_unique<AdaptiveDigitalGainController>(
            &data_dumper_, config.adaptive_digital,
            kAdjacentSpeechFramesThreshold);
  }
}

GainController2::~GainController2() = default;

// TODO(webrtc:7494): Pass the flag also to the other components.
void GainController2::SetCaptureOutputUsed(bool capture_output_used) {
  if (input_volume_controller_) {
    input_volume_controller_->HandleCaptureOutputUsedChange(
        capture_output_used);
  }
}

void GainController2::SetFixedGainDb(float gain_db) {
  const float gain_factor = DbToRatio(gain_db);
  if (fixed_gain_applier_.GetGainFactor() != gain_factor) {
    // Reset the limiter to quickly react on abrupt level changes caused by
    // large changes of the fixed gain.
    limiter_.Reset();
  }
  fixed_gain_applier_.SetGainFactor(gain_factor);
}

void GainController2::Analyze(int applied_input_volume,
                              const AudioBuffer& audio_buffer) {
  recommended_input_volume_ = std::nullopt;

  RTC_DCHECK_GE(applied_input_volume, 0);
  RTC_DCHECK_LE(applied_input_volume, 255);

  if (input_volume_controller_) {
    input_volume_controller_->AnalyzeInputAudio(applied_input_volume,
                                                audio_buffer);
  }
}

void GainController2::Process(std::optional<float> speech_probability,
                              bool input_volume_changed,
                              AudioBuffer* audio) {
  recommended_input_volume_ = std::nullopt;

  data_dumper_.DumpRaw("agc2_applied_input_volume_changed",
                       input_volume_changed);
  if (input_volume_changed) {
    // Handle input volume changes.
    if (speech_level_estimator_)
      speech_level_estimator_->Reset();
    if (saturation_protector_)
      saturation_protector_->Reset();
  }

  DeinterleavedView<float> float_frame = audio->view();

  // Compute speech probability.
  if (vad_) {
    // When the VAD component runs, `speech_probability` should not be specified
    // because APM should not run the same VAD twice (as an APM sub-module and
    // internally in AGC2).
    RTC_DCHECK(!speech_probability.has_value());
    speech_probability = vad_->Analyze(float_frame);
  }
  if (speech_probability.has_value()) {
    RTC_DCHECK_GE(*speech_probability, 0.0f);
    RTC_DCHECK_LE(*speech_probability, 1.0f);
  }
  // The speech probability may not be defined at this step (e.g., when the
  // fixed digital controller alone is enabled).
  if (speech_probability.has_value())
    data_dumper_.DumpRaw("agc2_speech_probability", *speech_probability);

  // Compute audio, noise and speech levels.
  AudioLevels audio_levels = ComputeAudioLevels(float_frame, data_dumper_);
  std::optional<float> noise_rms_dbfs;
  if (noise_level_estimator_) {
    // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
    // computation in `noise_level_estimator_`.
    noise_rms_dbfs = noise_level_estimator_->Analyze(float_frame);
  }
  std::optional<SpeechLevel> speech_level;
  if (speech_level_estimator_) {
    RTC_DCHECK(speech_probability.has_value());
    speech_level_estimator_->Update(
        audio_levels.rms_dbfs, audio_levels.peak_dbfs, *speech_probability);
    speech_level =
        SpeechLevel{.is_confident = speech_level_estimator_->is_confident(),
                    .rms_dbfs = speech_level_estimator_->level_dbfs()};
  }

  // Update the recommended input volume.
  if (input_volume_controller_) {
    RTC_DCHECK(speech_level.has_value());
    RTC_DCHECK(speech_probability.has_value());
    if (speech_probability.has_value()) {
      recommended_input_volume_ =
          input_volume_controller_->RecommendInputVolume(
              *speech_probability,
              speech_level->is_confident
                  ? std::optional<float>(speech_level->rms_dbfs)
                  : std::nullopt);
    }
  }

  if (adaptive_digital_controller_) {
    RTC_DCHECK(saturation_protector_);
    RTC_DCHECK(speech_probability.has_value());
    RTC_DCHECK(speech_level.has_value());
    saturation_protector_->Analyze(*speech_probability, audio_levels.peak_dbfs,
                                   speech_level->rms_dbfs);
    float headroom_db = saturation_protector_->HeadroomDb();
    data_dumper_.DumpRaw("agc2_headroom_db", headroom_db);
    float limiter_envelope_dbfs = FloatS16ToDbfs(limiter_.LastAudioLevel());
    data_dumper_.DumpRaw("agc2_limiter_envelope_dbfs", limiter_envelope_dbfs);
    RTC_DCHECK(noise_rms_dbfs.has_value());
    adaptive_digital_controller_->Process(
        /*info=*/{.speech_probability = *speech_probability,
                  .speech_level_dbfs = speech_level->rms_dbfs,
                  .speech_level_reliable = speech_level->is_confident,
                  .noise_rms_dbfs = *noise_rms_dbfs,
                  .headroom_db = headroom_db,
                  .limiter_envelope_dbfs = limiter_envelope_dbfs},
        float_frame);
  }

  // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
  // computation in `limiter_`.
  fixed_gain_applier_.ApplyGain(float_frame);

  limiter_.Process(float_frame);

  // Periodically log limiter stats.
  if (++calls_since_last_limiter_log_ == kLogLimiterStatsPeriodNumFrames) {
    calls_since_last_limiter_log_ = 0;
    InterpolatedGainCurve::Stats stats = limiter_.GetGainCurveStats();
    RTC_LOG(LS_INFO) << "[AGC2] limiter stats"
                     << " | identity: " << stats.look_ups_identity_region
                     << " | knee: " << stats.look_ups_knee_region
                     << " | limiter: " << stats.look_ups_limiter_region
                     << " | saturation: " << stats.look_ups_saturation_region;
  }
}

bool GainController2::Validate(
    const AudioProcessing::Config::GainController2& config) {
  const auto& fixed = config.fixed_digital;
  const auto& adaptive = config.adaptive_digital;
  return fixed.gain_db >= 0.0f && fixed.gain_db < 50.0f &&
         adaptive.headroom_db >= 0.0f && adaptive.max_gain_db > 0.0f &&
         adaptive.initial_gain_db >= 0.0f &&
         adaptive.max_gain_change_db_per_second > 0.0f &&
         adaptive.max_output_noise_level_dbfs <= 0.0f;
}

}  // namespace webrtc