#include "feature-window.h"
#include <algorithm>
#include <cmath>
#include <cstddef>
#include <limits>
#include <vector>
#include <iostream>
#include "kaldi-math.h"
namespace knf {
std::ostream &operator<<(std::ostream &os, const FrameExtractionOptions &opts) {
os << opts.ToString();
return os;
}
std::vector<float> GetWindow(const std::string &window_type,
int32_t window_size,
float blackman_coeff ) {
std::vector<float> window(window_size);
int32_t frame_length = window_size;
KNF_CHECK_GT(frame_length, 0);
float *window_data = window.data();
double a = M_2PI / (frame_length - 1);
if (window_type == "hann") {
a = M_2PI / frame_length;
}
for (int32_t i = 0; i < frame_length; i++) {
double i_fl = static_cast<double>(i);
if (window_type == "hanning") {
window_data[i] = 0.5 - 0.5 * cos(a * i_fl);
} else if (window_type == "sine") {
window_data[i] = sin(0.5 * a * i_fl);
} else if (window_type == "hamming") {
window_data[i] = 0.54 - 0.46 * cos(a * i_fl);
} else if (window_type == "hann") {
window_data[i] = 0.50 - 0.50 * cos(a * i_fl);
} else if (window_type == "povey") {
window_data[i] = pow(0.5 - 0.5 * cos(a * i_fl), 0.85);
} else if (window_type == "rectangular") {
window_data[i] = 1.0;
} else if (window_type == "blackman") {
window_data[i] = blackman_coeff - 0.5 * cos(a * i_fl) +
(0.5 - blackman_coeff) * cos(2 * a * i_fl);
} else {
fprintf(stderr, "Invalid window type '%s'\n", window_type.c_str());
exit(-1);
}
}
return window;
}
FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts)
: FeatureWindowFunction(opts.window_type, opts.WindowSize(),
opts.blackman_coeff) {}
FeatureWindowFunction::FeatureWindowFunction(const std::string &window_type,
int32_t window_size,
float blackman_coeff )
: window_(knf::GetWindow(window_type, window_size, blackman_coeff)) {}
FeatureWindowFunction::FeatureWindowFunction(const std::vector<float> &window)
: window_(window) {}
void FeatureWindowFunction::Apply(float *wave) const {
int32_t window_size = window_.size();
const float *p = window_.data();
for (int32_t k = 0; k != window_size; ++k) {
wave[k] *= p[k];
}
}
int64_t FirstSampleOfFrame(int32_t frame, const FrameExtractionOptions &opts) {
int64_t frame_shift = opts.WindowShift();
if (opts.snip_edges) {
return frame * frame_shift;
} else {
int64_t midpoint_of_frame = frame_shift * frame + frame_shift / 2,
beginning_of_frame = midpoint_of_frame - opts.WindowSize() / 2;
return beginning_of_frame;
}
}
int32_t NumFrames(int64_t num_samples, const FrameExtractionOptions &opts,
bool flush ) {
int64_t frame_shift = opts.WindowShift();
int64_t frame_length = opts.WindowSize();
if (opts.snip_edges) {
if (num_samples < frame_length)
return 0;
else
return (1 + ((num_samples - frame_length) / frame_shift));
} else {
int32_t num_frames = (num_samples + (frame_shift / 2)) / frame_shift;
if (flush) return num_frames;
int64_t end_sample_of_last_frame =
FirstSampleOfFrame(num_frames - 1, opts) + frame_length;
while (num_frames > 0 && end_sample_of_last_frame > num_samples) {
num_frames--;
end_sample_of_last_frame -= frame_shift;
}
return num_frames;
}
}
void ExtractWindow(int64_t sample_offset, const std::vector<float> &wave,
int32_t f, const FrameExtractionOptions &opts,
const FeatureWindowFunction &window_function,
std::vector<float> *window,
float *log_energy_pre_window ) {
KNF_CHECK(sample_offset >= 0 && wave.size() != 0);
int32_t frame_length = opts.WindowSize();
int32_t frame_length_padded = opts.PaddedWindowSize();
int64_t num_samples = sample_offset + wave.size();
int64_t start_sample = FirstSampleOfFrame(f, opts);
int64_t end_sample = start_sample + frame_length;
if (opts.snip_edges) {
KNF_CHECK(start_sample >= sample_offset && end_sample <= num_samples);
} else {
KNF_CHECK(sample_offset == 0 || start_sample >= sample_offset);
}
if (window->size() != frame_length_padded) {
window->resize(frame_length_padded);
}
int32_t wave_start = int32_t(start_sample - sample_offset);
int32_t wave_end = wave_start + frame_length;
if (wave_start >= 0 && wave_end <= wave.size()) {
std::copy(wave.begin() + wave_start,
wave.begin() + wave_start + frame_length, window->data());
} else {
int32_t wave_dim = wave.size();
for (int32_t s = 0; s < frame_length; ++s) {
int32_t s_in_wave = s + wave_start;
while (s_in_wave < 0 || s_in_wave >= wave_dim) {
if (s_in_wave < 0)
s_in_wave = -s_in_wave - 1;
else
s_in_wave = 2 * wave_dim - 1 - s_in_wave;
}
(*window)[s] = wave[s_in_wave];
}
}
ProcessWindow(opts, window_function, window->data(), log_energy_pre_window);
}
static void RemoveDcOffset(float *d, int32_t n) {
float sum = 0;
for (int32_t i = 0; i != n; ++i) {
sum += d[i];
}
float mean = sum / n;
for (int32_t i = 0; i != n; ++i) {
d[i] -= mean;
}
}
float InnerProduct(const float *a, const float *b, int32_t n) {
float sum = 0;
for (int32_t i = 0; i != n; ++i) {
sum += a[i] * b[i];
}
return sum;
}
void Dither(float *d, int32_t n, float dither_value) {
if (dither_value == 0.0) {
return;
}
RandomState rstate;
for (int32_t i = 0; i < n; ++i) {
d[i] += RandGauss(&rstate) * dither_value;
}
}
static void Preemphasize(float *d, int32_t n, float preemph_coeff) {
if (preemph_coeff == 0.0) {
return;
}
KNF_CHECK(preemph_coeff >= 0.0 && preemph_coeff <= 1.0);
for (int32_t i = n - 1; i > 0; --i) {
d[i] -= preemph_coeff * d[i - 1];
}
d[0] -= preemph_coeff * d[0];
}
void ProcessWindow(const FrameExtractionOptions &opts,
const FeatureWindowFunction &window_function, float *window,
float *log_energy_pre_window ) {
int32_t frame_length = opts.WindowSize();
if (opts.dither != 0.0) {
Dither(window, frame_length, opts.dither);
}
if (opts.remove_dc_offset) {
RemoveDcOffset(window, frame_length);
}
if (log_energy_pre_window != NULL) {
float energy = std::max<float>(InnerProduct(window, window, frame_length),
std::numeric_limits<float>::epsilon());
*log_energy_pre_window = std::log(energy);
}
if (opts.preemph_coeff != 0.0) {
Preemphasize(window, frame_length, opts.preemph_coeff);
}
window_function.Apply(window);
}
}