use super::{approx_eq_simd_f64, naive_dft};
#[cfg(target_arch = "x86_64")]
use crate::kernel::Complex;
#[cfg(target_arch = "x86_64")]
fn approx_eq_simd_f32(a: Complex<f32>, b: Complex<f32>) -> bool {
let abs_diff_re = (a.re - b.re).abs();
let abs_diff_im = (a.im - b.im).abs();
let rel_floor = 1e-4_f32 * a.re.abs().max(b.re.abs()).max(a.im.abs()).max(b.im.abs());
(abs_diff_re <= 1e-4_f32 || abs_diff_re <= rel_floor)
&& (abs_diff_im <= 1e-4_f32 || abs_diff_im <= rel_floor)
}
#[cfg(target_arch = "x86_64")]
fn naive_dft_f32(x: &[Complex<f32>], sign: i32) -> Vec<Complex<f32>> {
let n = x.len();
(0..n)
.map(|k| {
x.iter()
.enumerate()
.fold(Complex::new(0.0_f32, 0.0), |acc, (j, &xj)| {
let angle =
sign as f32 * 2.0 * core::f32::consts::PI * (k * j) as f32 / n as f32;
acc + xj * Complex::new(angle.cos(), angle.sin())
})
})
.collect()
}
#[cfg(target_arch = "x86_64")]
mod avx512_tests {
use super::*;
#[test]
fn avx512_size2_f64_forward() {
let input: Vec<Complex<f64>> = (0..2)
.map(|i| Complex::new(f64::from(i as i32) * 1.5 + 0.3, f64::from(i as i32) * 0.7))
.collect();
let expected = naive_dft(&input, -1);
let mut actual = input;
super::super::codelet_simd_2(&mut actual, -1);
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
approx_eq_simd_f64(*a, *e),
"avx512_size2_f64_forward index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx512_size2_f64_roundtrip() {
let original: Vec<Complex<f64>> = (0..2)
.map(|i| Complex::new(f64::from(i as i32).sin(), f64::from(i as i32).cos()))
.collect();
let mut data = original.clone();
super::super::codelet_simd_2(&mut data, -1);
super::super::codelet_simd_2(&mut data, 1);
for x in &mut data {
*x = Complex::new(x.re / 2.0, x.im / 2.0);
}
for (i, (a, e)) in data.iter().zip(original.iter()).enumerate() {
assert!(
approx_eq_simd_f64(*a, *e),
"avx512_size2_f64_roundtrip index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx512_size4_f64_forward() {
let input: Vec<Complex<f64>> = (0..4)
.map(|i| Complex::new(f64::from(i as i32) * 1.3, f64::from(i as i32) * 0.9))
.collect();
let expected = naive_dft(&input, -1);
let mut actual = input;
super::super::codelet_simd_4(&mut actual, -1);
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
approx_eq_simd_f64(*a, *e),
"avx512_size4_f64_forward index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx512_size4_f64_roundtrip() {
let original: Vec<Complex<f64>> = (0..4)
.map(|i| Complex::new(f64::from(i as i32).sin(), f64::from(i as i32).cos()))
.collect();
let mut data = original.clone();
super::super::codelet_simd_4(&mut data, -1);
super::super::codelet_simd_4(&mut data, 1);
for x in &mut data {
*x = Complex::new(x.re / 4.0, x.im / 4.0);
}
for (i, (a, e)) in data.iter().zip(original.iter()).enumerate() {
assert!(
approx_eq_simd_f64(*a, *e),
"avx512_size4_f64_roundtrip index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx512_size8_f64_forward() {
let input: Vec<Complex<f64>> = (0..8)
.map(|i| {
Complex::new(
f64::from(i as i32).sin() * 2.0 + 0.5,
f64::from(i as i32).cos(),
)
})
.collect();
let expected = naive_dft(&input, -1);
let mut actual = input;
super::super::codelet_simd_8(&mut actual, -1);
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
approx_eq_simd_f64(*a, *e),
"avx512_size8_f64_forward index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx512_size8_f64_roundtrip() {
let original: Vec<Complex<f64>> = (0..8)
.map(|i| Complex::new(f64::from(i as i32).sin(), f64::from(i as i32).cos()))
.collect();
let mut data = original.clone();
super::super::codelet_simd_8(&mut data, -1);
super::super::codelet_simd_8(&mut data, 1);
for x in &mut data {
*x = Complex::new(x.re / 8.0, x.im / 8.0);
}
for (i, (a, e)) in data.iter().zip(original.iter()).enumerate() {
assert!(
approx_eq_simd_f64(*a, *e),
"avx512_size8_f64_roundtrip index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx512_size4_f32_forward() {
let input: Vec<Complex<f32>> = (0..4)
.map(|i| Complex::new(f32::from(i as u8) * 1.3, f32::from(i as u8) * 0.9))
.collect();
let expected = naive_dft_f32(&input, -1);
let mut actual = input;
super::super::codelet_simd_4(&mut actual, -1);
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
approx_eq_simd_f32(*a, *e),
"avx512_size4_f32_forward index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx512_size4_f32_roundtrip() {
let original: Vec<Complex<f32>> = (0..4)
.map(|i| Complex::new((i as f32).sin(), (i as f32).cos()))
.collect();
let mut data = original.clone();
super::super::codelet_simd_4(&mut data, -1);
super::super::codelet_simd_4(&mut data, 1);
for x in &mut data {
*x = Complex::new(x.re / 4.0, x.im / 4.0);
}
for (i, (a, e)) in data.iter().zip(original.iter()).enumerate() {
assert!(
approx_eq_simd_f32(*a, *e),
"avx512_size4_f32_roundtrip index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx512_size8_f32_forward() {
let input: Vec<Complex<f32>> = (0..8)
.map(|i| Complex::new((i as f32).sin() * 2.0 + 0.5, (i as f32).cos()))
.collect();
let expected = naive_dft_f32(&input, -1);
let mut actual = input;
super::super::codelet_simd_8(&mut actual, -1);
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
approx_eq_simd_f32(*a, *e),
"avx512_size8_f32_forward index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx512_size8_f32_roundtrip() {
let original: Vec<Complex<f32>> = (0..8)
.map(|i| Complex::new((i as f32).sin(), (i as f32).cos()))
.collect();
let mut data = original.clone();
super::super::codelet_simd_8(&mut data, -1);
super::super::codelet_simd_8(&mut data, 1);
for x in &mut data {
*x = Complex::new(x.re / 8.0, x.im / 8.0);
}
for (i, (a, e)) in data.iter().zip(original.iter()).enumerate() {
assert!(
approx_eq_simd_f32(*a, *e),
"avx512_size8_f32_roundtrip index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx512_size16_f32_forward() {
let input: Vec<Complex<f32>> = (0..16)
.map(|i| Complex::new((i as f32) * 0.5, (i as f32) * 0.3 - 1.0))
.collect();
let expected = naive_dft_f32(&input, -1);
let mut actual = input;
super::super::codelet_simd_16(&mut actual, -1);
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
approx_eq_simd_f32(*a, *e),
"avx512_size16_f32_forward index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx512_size16_f32_roundtrip() {
let original: Vec<Complex<f32>> = (0..16)
.map(|i| Complex::new((i as f32).sin(), (i as f32).cos()))
.collect();
let mut data = original.clone();
super::super::codelet_simd_16(&mut data, -1);
super::super::codelet_simd_16(&mut data, 1);
for x in &mut data {
*x = Complex::new(x.re / 16.0, x.im / 16.0);
}
for (i, (a, e)) in data.iter().zip(original.iter()).enumerate() {
assert!(
approx_eq_simd_f32(*a, *e),
"avx512_size16_f32_roundtrip index {i}: {a:?} != {e:?}"
);
}
}
}
mod avx2_fma_regression {
use super::{approx_eq_simd_f64, naive_dft};
use crate::kernel::Complex;
#[test]
fn avx2_fma_parity_size2_f64() {
let input = vec![Complex::new(1.5_f64, -0.3), Complex::new(-0.7, 2.1)];
let expected = naive_dft(&input, -1);
let mut actual = input;
super::super::codelet_simd_2(&mut actual, -1);
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
approx_eq_simd_f64(*a, *e),
"avx2_fma_parity_size2_f64 index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx2_fma_parity_size4_f64() {
let input = vec![
Complex::new(0.731_f64, -0.429),
Complex::new(-1.213, 0.876),
Complex::new(0.051, 2.001),
Complex::new(-0.999, -0.555),
];
let expected = naive_dft(&input, -1);
let mut actual = input;
super::super::codelet_simd_4(&mut actual, -1);
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
approx_eq_simd_f64(*a, *e),
"avx2_fma_parity_size4_f64 index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx2_fma_parity_size8_f64() {
let input: Vec<Complex<f64>> = (0..8)
.map(|i| Complex::new(f64::from(i as i32).sin(), f64::from(i as i32).cos()))
.collect();
let expected = naive_dft(&input, -1);
let mut actual = input;
super::super::codelet_simd_8(&mut actual, -1);
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
approx_eq_simd_f64(*a, *e),
"avx2_fma_parity_size8_f64 index {i}: {a:?} != {e:?}"
);
}
}
}
#[cfg(target_arch = "x86_64")]
mod pure_avx_parity_tests {
use super::{approx_eq_simd_f64, naive_dft};
use crate::kernel::Complex;
fn avx_approx_eq(a: Complex<f64>, b: Complex<f64>) -> bool {
let abs_diff_re = (a.re - b.re).abs();
let abs_diff_im = (a.im - b.im).abs();
let abs_floor = 1e-12_f64;
abs_diff_re <= abs_floor && abs_diff_im <= abs_floor || approx_eq_simd_f64(a, b)
}
#[test]
fn avx_size2_f64_forward() {
if !is_x86_feature_detected!("avx") {
return;
}
let input = vec![Complex::new(1.5_f64, -0.3), Complex::new(-0.7, 2.1)];
let expected = naive_dft(&input, -1);
let mut actual = input;
super::super::codelet_simd_2(&mut actual, -1);
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
avx_approx_eq(*a, *e),
"avx_size2_f64_forward index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx_size2_f64_roundtrip() {
if !is_x86_feature_detected!("avx") {
return;
}
let original = vec![Complex::new(0.731_f64, -0.429), Complex::new(-1.213, 0.876)];
let mut data = original.clone();
super::super::codelet_simd_2(&mut data, -1);
super::super::codelet_simd_2(&mut data, 1);
for x in &mut data {
*x = Complex::new(x.re / 2.0, x.im / 2.0);
}
for (i, (a, e)) in data.iter().zip(original.iter()).enumerate() {
assert!(
avx_approx_eq(*a, *e),
"avx_size2_f64_roundtrip index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx_size4_f64_forward() {
if !is_x86_feature_detected!("avx") {
return;
}
let input: Vec<Complex<f64>> = (0..4)
.map(|i| Complex::new(f64::from(i as i32) * 1.3, f64::from(i as i32) * 0.9))
.collect();
let expected = naive_dft(&input, -1);
let mut actual = input;
super::super::codelet_simd_4(&mut actual, -1);
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
avx_approx_eq(*a, *e),
"avx_size4_f64_forward index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx_size4_f64_roundtrip() {
if !is_x86_feature_detected!("avx") {
return;
}
let original: Vec<Complex<f64>> = (0..4)
.map(|i| Complex::new(f64::from(i as i32).sin(), f64::from(i as i32).cos()))
.collect();
let mut data = original.clone();
super::super::codelet_simd_4(&mut data, -1);
super::super::codelet_simd_4(&mut data, 1);
for x in &mut data {
*x = Complex::new(x.re / 4.0, x.im / 4.0);
}
for (i, (a, e)) in data.iter().zip(original.iter()).enumerate() {
assert!(
avx_approx_eq(*a, *e),
"avx_size4_f64_roundtrip index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx_size8_f64_forward() {
if !is_x86_feature_detected!("avx") {
return;
}
let input: Vec<Complex<f64>> = (0..8)
.map(|i| {
Complex::new(
f64::from(i as i32).sin() * 2.0 + 0.5,
f64::from(i as i32).cos(),
)
})
.collect();
let expected = naive_dft(&input, -1);
let mut actual = input;
super::super::codelet_simd_8(&mut actual, -1);
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
avx_approx_eq(*a, *e),
"avx_size8_f64_forward index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx_size8_f64_roundtrip() {
if !is_x86_feature_detected!("avx") {
return;
}
let original: Vec<Complex<f64>> = (0..8)
.map(|i| Complex::new(f64::from(i as i32).sin(), f64::from(i as i32).cos()))
.collect();
let mut data = original.clone();
super::super::codelet_simd_8(&mut data, -1);
super::super::codelet_simd_8(&mut data, 1);
for x in &mut data {
*x = Complex::new(x.re / 8.0, x.im / 8.0);
}
for (i, (a, e)) in data.iter().zip(original.iter()).enumerate() {
assert!(
avx_approx_eq(*a, *e),
"avx_size8_f64_roundtrip index {i}: {a:?} != {e:?}"
);
}
}
}
#[cfg(target_arch = "x86_64")]
mod avx2_shuffle_rewrite_parity {
use super::{approx_eq_simd_f64, naive_dft};
use crate::kernel::Complex;
fn fixed_inputs_size2() -> Vec<Complex<f64>> {
vec![Complex::new(1.5_f64, -0.3), Complex::new(-0.7, 2.1)]
}
fn fixed_inputs_size4() -> Vec<Complex<f64>> {
vec![
Complex::new(0.731_f64, -0.429),
Complex::new(-1.213, 0.876),
Complex::new(0.051, 2.001),
Complex::new(-0.999, -0.555),
]
}
fn fixed_inputs_size8() -> Vec<Complex<f64>> {
(0..8)
.map(|i| Complex::new(f64::from(i as i32).sin(), f64::from(i as i32).cos()))
.collect()
}
#[test]
fn avx2_shuffle_parity_size2_f64() {
let input = fixed_inputs_size2();
let expected = naive_dft(&input, -1);
let mut actual = input;
super::super::codelet_simd_2(&mut actual, -1);
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
approx_eq_simd_f64(*a, *e),
"avx2_shuffle_parity_size2 index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx2_shuffle_parity_size4_f64() {
let input = fixed_inputs_size4();
let expected = naive_dft(&input, -1);
let mut actual = input;
super::super::codelet_simd_4(&mut actual, -1);
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
approx_eq_simd_f64(*a, *e),
"avx2_shuffle_parity_size4 index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx2_shuffle_parity_size8_f64() {
let input = fixed_inputs_size8();
let expected = naive_dft(&input, -1);
let mut actual = input;
super::super::codelet_simd_8(&mut actual, -1);
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
approx_eq_simd_f64(*a, *e),
"avx2_shuffle_parity_size8 index {i}: {a:?} != {e:?}"
);
}
}
#[test]
fn avx2_shuffle_parity_size2_f64_inverse() {
let input = fixed_inputs_size2();
let expected = naive_dft(&input, 1);
let mut actual = input;
super::super::codelet_simd_2(&mut actual, 1);
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
approx_eq_simd_f64(*a, *e),
"avx2_shuffle_parity_size2_inv index {i}: {a:?} != {e:?}"
);
}
}
}