use core::{f32::consts::PI, mem::MaybeUninit};
#[cfg(target_arch = "aarch64")]
use core::arch::aarch64::{
float32x4_t, vbslq_f32, vcgtq_f32, vcvtq_f32_u32, vdupq_n_f32, vfmaq_f32, vld1q_u32, vmlaq_f32,
vmulq_f32, vmulq_n_f32, vnegq_f32, vst1q_f32, vsubq_f32,
};
use derive_more::{Display, IsVariant};
use crate::error::{Error, Result};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Display, IsVariant)]
#[display("{}", self.as_str())]
pub enum SymWindowKind {
Hann,
Hamming,
Blackman,
Bartlett,
}
impl SymWindowKind {
pub const fn as_str(&self) -> &'static str {
match self {
Self::Hann => "hann",
Self::Hamming => "hamming",
Self::Blackman => "blackman",
Self::Bartlett => "bartlett",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Display, IsVariant)]
#[display("{}", self.as_str())]
pub enum KaldiWindowKind {
Hamming,
Hanning,
Rectangular,
}
impl KaldiWindowKind {
pub const fn as_str(&self) -> &'static str {
match self {
Self::Hamming => "hamming",
Self::Hanning => "hanning",
Self::Rectangular => "rectangular",
}
}
}
#[inline]
#[doc(hidden)]
pub fn symmetric_window_scalar(kind: SymWindowKind, n: usize) -> Result<Vec<f32>> {
assert!(n >= 2, "symmetric_window_scalar: n must be >= 2 (got {n})");
let denom = (n - 1) as f32;
let mut out: Vec<f32> = Vec::new();
out.try_reserve_exact(n).map_err(|_| Error::OutOfMemory)?;
match kind {
SymWindowKind::Hann => {
for k in 0..n {
let theta = 2.0 * PI * (k as f32) / denom;
out.push(0.5 * (1.0 - theta.cos()));
}
}
SymWindowKind::Hamming => {
for k in 0..n {
let theta = 2.0 * PI * (k as f32) / denom;
out.push(0.54 - 0.46 * theta.cos());
}
}
SymWindowKind::Blackman => {
for k in 0..n {
let theta = 2.0 * PI * (k as f32) / denom;
out.push(0.42 - 0.5 * theta.cos() + 0.08 * (2.0 * theta).cos());
}
}
SymWindowKind::Bartlett => {
for k in 0..n {
out.push(1.0 - 2.0 * (k as f32 - denom / 2.0).abs() / denom);
}
}
}
Ok(out)
}
#[inline]
#[doc(hidden)]
pub fn kaldi_window_scalar(kind: KaldiWindowKind, n: usize) -> Result<Vec<f32>> {
assert!(n >= 2, "kaldi_window_scalar: n must be >= 2 (got {n})");
let denom = (n - 1) as f32;
let mut out: Vec<f32> = Vec::new();
out.try_reserve_exact(n).map_err(|_| Error::OutOfMemory)?;
match kind {
KaldiWindowKind::Hamming => {
for k in 0..n {
let theta = 2.0 * PI * (k as f32) / denom;
out.push(0.54 - 0.46 * theta.cos());
}
}
KaldiWindowKind::Hanning => {
for k in 0..n {
let theta = 2.0 * PI * (k as f32) / denom;
out.push(0.5 - 0.5 * theta.cos());
}
}
KaldiWindowKind::Rectangular => {
out.resize(n, 1.0);
}
}
Ok(out)
}
#[cfg(target_arch = "aarch64")]
const PI_F: f32 = PI;
#[cfg(target_arch = "aarch64")]
const TWO_PI: f32 = 2.0 * PI;
#[cfg(target_arch = "aarch64")]
const PI_HALF: f32 = 0.5 * PI;
#[cfg(target_arch = "aarch64")]
#[inline]
#[target_feature(enable = "neon")]
unsafe fn cos_neon_4(x: float32x4_t) -> float32x4_t {
let two_pi = vdupq_n_f32(TWO_PI);
let pi = vdupq_n_f32(PI_F);
let pi_half = vdupq_n_f32(PI_HALF);
let gt_pi = vcgtq_f32(x, pi);
let x = vbslq_f32(gt_pi, vsubq_f32(two_pi, x), x);
let gt_pi_half = vcgtq_f32(x, pi_half);
let x = vbslq_f32(gt_pi_half, vsubq_f32(pi, x), x);
let x2 = vmulq_f32(x, x);
let c2 = vdupq_n_f32(-0.5);
let c4 = vdupq_n_f32(1.0 / 24.0);
let c6 = vdupq_n_f32(-1.0 / 720.0);
let c8 = vdupq_n_f32(1.0 / 40320.0);
let c10 = vdupq_n_f32(-1.0 / 3_628_800.0);
let c12 = vdupq_n_f32(1.0 / 479_001_600.0);
let one = vdupq_n_f32(1.0);
let mut p = c12;
p = vfmaq_f32(c10, p, x2);
p = vfmaq_f32(c8, p, x2);
p = vfmaq_f32(c6, p, x2);
p = vfmaq_f32(c4, p, x2);
p = vfmaq_f32(c2, p, x2);
p = vfmaq_f32(one, p, x2);
vbslq_f32(gt_pi_half, vnegq_f32(p), p)
}
#[cfg(target_arch = "aarch64")]
#[inline]
#[target_feature(enable = "neon")]
unsafe fn theta_neon_4(k_base: u32, inv_denom_times_2pi: f32) -> float32x4_t {
let lane_offsets = [k_base, k_base + 1, k_base + 2, k_base + 3];
unsafe {
let k_u32 = vld1q_u32(lane_offsets.as_ptr());
let k_f = vcvtq_f32_u32(k_u32);
vmulq_n_f32(k_f, inv_denom_times_2pi)
}
}
#[cfg(target_arch = "aarch64")]
#[inline]
fn symmetric_window_sample(kind: SymWindowKind, k: usize, denom: f32) -> f32 {
let theta = 2.0 * PI * (k as f32) / denom;
match kind {
SymWindowKind::Hann => 0.5 * (1.0 - theta.cos()),
SymWindowKind::Hamming => 0.54 - 0.46 * theta.cos(),
SymWindowKind::Blackman => 0.42 - 0.5 * theta.cos() + 0.08 * (2.0 * theta).cos(),
SymWindowKind::Bartlett => 1.0 - 2.0 * (k as f32 - denom / 2.0).abs() / denom,
}
}
#[cfg(target_arch = "aarch64")]
#[inline]
#[target_feature(enable = "neon")]
unsafe fn symmetric_window_neon(kind: SymWindowKind, out: &mut [MaybeUninit<f32>], n: usize) {
assert_eq!(
out.len(),
n,
"symmetric_window_neon: out.len() ({}) must equal n ({})",
out.len(),
n,
);
assert!(n >= 2, "symmetric_window_neon: n must be >= 2 (got {n})");
let denom = (n - 1) as f32;
if matches!(kind, SymWindowKind::Bartlett) {
for (k, slot) in out.iter_mut().enumerate().take(n) {
slot.write(symmetric_window_sample(kind, k, denom));
}
return;
}
let inv_denom_times_2pi = 2.0 * PI_F / denom;
let body_len = n - (n % 4);
unsafe {
let dst_base = out.as_mut_ptr().cast::<f32>();
let mut i = 0usize;
while i + 4 <= body_len {
let theta = theta_neon_4(i as u32, inv_denom_times_2pi);
let cos_theta = cos_neon_4(theta);
let w = match kind {
SymWindowKind::Hann => {
vmulq_n_f32(vsubq_f32(vdupq_n_f32(1.0), cos_theta), 0.5)
}
SymWindowKind::Hamming => {
vmlaq_f32(vdupq_n_f32(0.54), cos_theta, vdupq_n_f32(-0.46))
}
SymWindowKind::Blackman => {
let two_theta = vmulq_n_f32(theta, 2.0);
let two_pi_v = vdupq_n_f32(TWO_PI);
let ge_2pi = vcgtq_f32(two_theta, two_pi_v);
let two_theta_folded = vbslq_f32(ge_2pi, vsubq_f32(two_theta, two_pi_v), two_theta);
let cos_2theta = cos_neon_4(two_theta_folded);
let term1 = vmlaq_f32(vdupq_n_f32(0.42), cos_theta, vdupq_n_f32(-0.5));
vmlaq_f32(term1, cos_2theta, vdupq_n_f32(0.08))
}
SymWindowKind::Bartlett => unreachable!(),
};
vst1q_f32(dst_base.add(i), w);
i += 4;
}
}
for (k, slot) in out.iter_mut().enumerate().take(n).skip(body_len) {
slot.write(symmetric_window_sample(kind, k, denom));
}
}
#[cfg(target_arch = "aarch64")]
#[inline]
fn kaldi_window_sample(kind: KaldiWindowKind, k: usize, denom: f32) -> f32 {
let theta = 2.0 * PI * (k as f32) / denom;
match kind {
KaldiWindowKind::Hamming => 0.54 - 0.46 * theta.cos(),
KaldiWindowKind::Hanning => 0.5 - 0.5 * theta.cos(),
KaldiWindowKind::Rectangular => 1.0,
}
}
#[cfg(target_arch = "aarch64")]
#[inline]
#[target_feature(enable = "neon")]
unsafe fn kaldi_window_neon(kind: KaldiWindowKind, out: &mut [MaybeUninit<f32>], n: usize) {
assert_eq!(
out.len(),
n,
"kaldi_window_neon: out.len() ({}) must equal n ({})",
out.len(),
n,
);
assert!(n >= 2, "kaldi_window_neon: n must be >= 2 (got {n})");
if matches!(kind, KaldiWindowKind::Rectangular) {
for slot in out.iter_mut() {
slot.write(1.0);
}
return;
}
let denom = (n - 1) as f32;
let inv_denom_times_2pi = 2.0 * PI_F / denom;
let body_len = n - (n % 4);
unsafe {
let dst_base = out.as_mut_ptr().cast::<f32>();
let mut i = 0usize;
while i + 4 <= body_len {
let theta = theta_neon_4(i as u32, inv_denom_times_2pi);
let cos_theta = cos_neon_4(theta);
let w = match kind {
KaldiWindowKind::Hamming => vmlaq_f32(vdupq_n_f32(0.54), cos_theta, vdupq_n_f32(-0.46)),
KaldiWindowKind::Hanning => vmulq_n_f32(vsubq_f32(vdupq_n_f32(1.0), cos_theta), 0.5),
KaldiWindowKind::Rectangular => unreachable!(),
};
vst1q_f32(dst_base.add(i), w);
i += 4;
}
}
for (k, slot) in out.iter_mut().enumerate().take(n).skip(body_len) {
slot.write(kaldi_window_sample(kind, k, denom));
}
}
pub fn symmetric_window(kind: SymWindowKind, n: usize) -> Result<Vec<f32>> {
assert!(
n >= 2,
"simd::audio::window::symmetric_window: n must be >= 2 (got {n})"
);
#[cfg(target_arch = "aarch64")]
{
if crate::simd::is_neon_available() {
let mut v: Vec<f32> = Vec::new();
v.try_reserve_exact(n).map_err(|_| Error::OutOfMemory)?;
let spare: &mut [MaybeUninit<f32>] = v.spare_capacity_mut();
unsafe {
symmetric_window_neon(kind, &mut spare[..n], n);
v.set_len(n);
}
return Ok(v);
}
}
symmetric_window_scalar(kind, n)
}
pub fn kaldi_window(kind: KaldiWindowKind, n: usize) -> Result<Vec<f32>> {
assert!(
n >= 2,
"simd::audio::window::kaldi_window: n must be >= 2 (got {n})"
);
#[cfg(target_arch = "aarch64")]
{
if crate::simd::is_neon_available() {
let mut v: Vec<f32> = Vec::new();
v.try_reserve_exact(n).map_err(|_| Error::OutOfMemory)?;
let spare: &mut [MaybeUninit<f32>] = v.spare_capacity_mut();
unsafe {
kaldi_window_neon(kind, &mut spare[..n], n);
v.set_len(n);
}
return Ok(v);
}
}
kaldi_window_scalar(kind, n)
}
#[cfg(test)]
mod tests;