#[inline]
pub fn gain_block_scalar(buf: &mut [f32], gain: f32) {
for s in buf {
*s *= gain;
}
}
#[inline]
pub fn gain_block(buf: &mut [f32], gain: f32) {
#[cfg(feature = "wide-backend")]
{
use wide::f32x8;
let g = f32x8::splat(gain);
let n = buf.len();
let n8 = n / 8 * 8;
let (head, tail) = buf.split_at_mut(n8);
for chunk in head.chunks_exact_mut(8) {
let v = f32x8::from(<[f32; 8]>::try_from(&chunk[..]).unwrap_or_default());
chunk.copy_from_slice((v * g).as_array_ref());
}
gain_block_scalar(tail, gain);
}
#[cfg(not(feature = "wide-backend"))]
gain_block_scalar(buf, gain);
}
#[inline]
pub fn scale_block_scalar(out: &mut [f32], src: &[f32], scale: f32) {
let n = out.len().min(src.len());
for i in 0..n {
out[i] = src[i] * scale;
}
}
#[inline]
pub fn scale_block(out: &mut [f32], src: &[f32], scale: f32) {
#[cfg(feature = "wide-backend")]
{
use wide::f32x8;
let n = out.len().min(src.len());
let n8 = n / 8 * 8;
let g = f32x8::splat(scale);
let (out_v, out_tail) = out[..n].split_at_mut(n8);
let src_v = &src[..n8];
let src_tail = &src[n8..n];
for (out_chunk, src_chunk) in out_v.chunks_exact_mut(8).zip(src_v.chunks_exact(8)) {
let v = f32x8::from(<[f32; 8]>::try_from(src_chunk).unwrap_or_default());
out_chunk.copy_from_slice((v * g).as_array_ref());
}
scale_block_scalar(out_tail, src_tail, scale);
}
#[cfg(not(feature = "wide-backend"))]
scale_block_scalar(out, src, scale);
}
#[inline]
pub fn mul_block_scalar(out: &mut [f32], a: &[f32], b: &[f32]) {
let n = out.len().min(a.len()).min(b.len());
for i in 0..n {
out[i] = a[i] * b[i];
}
}
#[inline]
pub fn mul_block(out: &mut [f32], a: &[f32], b: &[f32]) {
#[cfg(feature = "wide-backend")]
{
use wide::f32x8;
let n = out.len().min(a.len()).min(b.len());
let n8 = n / 8 * 8;
let (out_v, out_tail) = out[..n].split_at_mut(n8);
let a_v = &a[..n8];
let b_v = &b[..n8];
let a_tail = &a[n8..n];
let b_tail = &b[n8..n];
for ((out_chunk, a_chunk), b_chunk) in out_v
.chunks_exact_mut(8)
.zip(a_v.chunks_exact(8))
.zip(b_v.chunks_exact(8))
{
let av = f32x8::from(<[f32; 8]>::try_from(a_chunk).unwrap_or_default());
let bv = f32x8::from(<[f32; 8]>::try_from(b_chunk).unwrap_or_default());
let mv = av * bv;
out_chunk.copy_from_slice(mv.as_array_ref());
}
mul_block_scalar(out_tail, a_tail, b_tail);
}
#[cfg(not(feature = "wide-backend"))]
mul_block_scalar(out, a, b);
}
#[inline]
pub fn mac_block_scalar(out: &mut [f32], src: &[f32], scale: f32) {
let n = out.len().min(src.len());
for i in 0..n {
out[i] += src[i] * scale;
}
}
#[inline]
pub fn mac_block(out: &mut [f32], src: &[f32], scale: f32) {
#[cfg(feature = "wide-backend")]
{
use wide::f32x8;
let n = out.len().min(src.len());
let n8 = n / 8 * 8;
let (out_v, out_tail) = out[..n].split_at_mut(n8);
let src_v = &src[..n8];
let src_tail = &src[n8..n];
let s = f32x8::splat(scale);
for (out_chunk, src_chunk) in out_v.chunks_exact_mut(8).zip(src_v.chunks_exact(8)) {
let ov = f32x8::from(<[f32; 8]>::try_from(&out_chunk[..]).unwrap_or_default());
let sv = f32x8::from(<[f32; 8]>::try_from(src_chunk).unwrap_or_default());
let r = ov + sv * s;
out_chunk.copy_from_slice(r.as_array_ref());
}
mac_block_scalar(out_tail, src_tail, scale);
}
#[cfg(not(feature = "wide-backend"))]
mac_block_scalar(out, src, scale);
}
#[inline]
pub fn mix_block_scalar(out: &mut [f32], a: &[f32], gain_a: f32, b: &[f32], gain_b: f32) {
let n = out.len().min(a.len()).min(b.len());
for i in 0..n {
out[i] = a[i] * gain_a + b[i] * gain_b;
}
}
#[inline]
pub fn mix_block(out: &mut [f32], a: &[f32], gain_a: f32, b: &[f32], gain_b: f32) {
#[cfg(feature = "wide-backend")]
{
use wide::f32x8;
let n = out.len().min(a.len()).min(b.len());
let n8 = n / 8 * 8;
let (out_v, out_tail) = out[..n].split_at_mut(n8);
let a_v = &a[..n8];
let b_v = &b[..n8];
let a_tail = &a[n8..n];
let b_tail = &b[n8..n];
let ga = f32x8::splat(gain_a);
let gb = f32x8::splat(gain_b);
for ((out_chunk, a_chunk), b_chunk) in out_v
.chunks_exact_mut(8)
.zip(a_v.chunks_exact(8))
.zip(b_v.chunks_exact(8))
{
let av = f32x8::from(<[f32; 8]>::try_from(a_chunk).unwrap_or_default());
let bv = f32x8::from(<[f32; 8]>::try_from(b_chunk).unwrap_or_default());
let r = av * ga + bv * gb;
out_chunk.copy_from_slice(r.as_array_ref());
}
mix_block_scalar(out_tail, a_tail, gain_a, b_tail, gain_b);
}
#[cfg(not(feature = "wide-backend"))]
mix_block_scalar(out, a, gain_a, b, gain_b);
}
#[inline]
pub fn copy_block(out: &mut [f32], src: &[f32]) {
let n = out.len().min(src.len());
out[..n].copy_from_slice(&src[..n]);
}
#[inline]
pub fn zero_block(buf: &mut [f32]) {
buf.fill(0.0);
}
#[inline]
#[must_use]
pub fn abs_max_block(buf: &[f32]) -> f32 {
let mut peak = 0.0_f32;
for &v in buf {
if v.is_nan() {
return f32::NAN;
}
let a = v.abs();
if a > peak {
peak = a;
}
}
peak
}
#[cfg(test)]
mod tests {
#![allow(clippy::float_cmp, clippy::cast_precision_loss)]
use super::*;
#[test]
fn gain_block_matches_scalar() {
for n in [0, 1, 7, 8, 9, 16, 31, 32, 33, 128] {
let init: Vec<f32> = (0..n).map(|i| i as f32 * 0.5 - 1.0).collect();
let mut a = init.clone();
let mut b = init.clone();
gain_block_scalar(&mut a, 0.75);
gain_block(&mut b, 0.75);
assert_eq!(a, b, "mismatch at n={n}");
}
}
#[test]
fn scale_block_matches_scalar() {
for n in [0, 1, 7, 8, 9, 16, 33] {
let src: Vec<f32> = (0..n).map(|i| i as f32 * 0.3 - 1.0).collect();
let mut out_s = vec![0.0; n];
let mut out_v = vec![0.0; n];
scale_block_scalar(&mut out_s, &src, 0.5);
scale_block(&mut out_v, &src, 0.5);
assert_eq!(out_s, out_v, "mismatch at n={n}");
}
}
#[test]
fn mul_block_matches_scalar() {
for n in [0, 1, 7, 8, 33] {
let a: Vec<f32> = (0..n).map(|i| i as f32 * 0.1).collect();
let b: Vec<f32> = (0..n).map(|i| i as f32 * -0.2).collect();
let mut out_s = vec![0.0; n];
let mut out_v = vec![0.0; n];
mul_block_scalar(&mut out_s, &a, &b);
mul_block(&mut out_v, &a, &b);
assert_eq!(out_s, out_v, "mismatch at n={n}");
}
}
#[test]
fn mac_block_matches_scalar() {
for n in [0, 7, 16, 65] {
let src: Vec<f32> = (0..n).map(|i| i as f32).collect();
let mut a = vec![1.0; n];
let mut b = vec![1.0; n];
mac_block_scalar(&mut a, &src, 0.25);
mac_block(&mut b, &src, 0.25);
assert_eq!(a, b);
}
}
#[test]
fn mix_block_matches_scalar() {
let a: Vec<f32> = (0..32).map(|i| i as f32).collect();
let b: Vec<f32> = (0..32).map(|i| (i as f32) * 2.0).collect();
let mut out_s = vec![0.0; 32];
let mut out_v = vec![0.0; 32];
mix_block_scalar(&mut out_s, &a, 0.5, &b, 0.25);
mix_block(&mut out_v, &a, 0.5, &b, 0.25);
assert_eq!(out_s, out_v);
}
#[test]
fn abs_max_block_finds_peak() {
let buf = [-0.1, 0.7, -0.9, 0.3];
assert!((abs_max_block(&buf) - 0.9).abs() < 1e-6);
assert_eq!(abs_max_block(&[]), 0.0);
assert!(abs_max_block(&[1.0, f32::NAN, 2.0]).is_nan());
}
}