use crate::{Complex32, Radix};
macro_rules! define_stockham_autosort {
(
$fn_name:ident,
$butterfly_2_p1:path,
$butterfly_2_gen:path,
$butterfly_3_p1:path,
$butterfly_3_gen:path,
$butterfly_4_p1:path,
$butterfly_4_gen:path,
$butterfly_5_p1:path,
$butterfly_5_gen:path,
$butterfly_7_p1:path,
$butterfly_7_gen:path,
$butterfly_8_p1:path,
$butterfly_8_gen:path,
cfg = $cfg:meta
) => {
#[cfg($cfg)]
#[must_use]
pub(crate) fn $fn_name(
data: &mut [Complex32],
twiddles: &[Complex32],
factors: &[Radix],
scratchpad: &mut [Complex32],
) -> OutputLocation {
let n = data.len();
debug_assert_eq!(n, scratchpad.len());
if factors.is_empty() {
return OutputLocation::Data;
}
let mut twiddle_offset = 0;
let mut stride = 1;
let mut input = data;
let mut output = scratchpad;
for factor in factors {
let radix = factor.radix();
let num_twiddles_per_column = radix - 1;
let iterations = n / radix;
let stage_twiddle_count =
(iterations * num_twiddles_per_column) * (stride != 1) as usize;
let stage_twiddles =
&twiddles[twiddle_offset..twiddle_offset + stage_twiddle_count];
match (factor, stride) {
(Radix::Factor2, 1) => $butterfly_2_p1(input, output, stage_twiddles),
(Radix::Factor2, _) => $butterfly_2_gen(input, output, stage_twiddles, stride),
(Radix::Factor3, 1) => $butterfly_3_p1(input, output, stage_twiddles),
(Radix::Factor3, _) => $butterfly_3_gen(input, output, stage_twiddles, stride),
(Radix::Factor4, 1) => $butterfly_4_p1(input, output, stage_twiddles),
(Radix::Factor4, _) => $butterfly_4_gen(input, output, stage_twiddles, stride),
(Radix::Factor5, 1) => $butterfly_5_p1(input, output, stage_twiddles),
(Radix::Factor5, _) => $butterfly_5_gen(input, output, stage_twiddles, stride),
(Radix::Factor7, 1) => $butterfly_7_p1(input, output, stage_twiddles),
(Radix::Factor7, _) => $butterfly_7_gen(input, output, stage_twiddles, stride),
(Radix::Factor8, 1) => $butterfly_8_p1(input, output, stage_twiddles),
(Radix::Factor8, _) => $butterfly_8_gen(input, output, stage_twiddles, stride),
}
core::mem::swap(&mut input, &mut output);
twiddle_offset += stage_twiddle_count;
stride *= radix;
}
if factors.len().is_multiple_of(2) {
OutputLocation::Data
} else {
OutputLocation::Scratchpad
}
}
};
}
define_stockham_autosort!(
stockham_autosort_avx_fma,
crate::fft::butterflies::butterfly_radix2_stride1_avx_fma_dispatch,
crate::fft::butterflies::butterfly_radix2_generic_avx_fma_dispatch,
crate::fft::butterflies::butterfly_radix3_stride1_avx_fma_dispatch,
crate::fft::butterflies::butterfly_radix3_generic_avx_fma_dispatch,
crate::fft::butterflies::butterfly_radix4_stride1_avx_fma_dispatch,
crate::fft::butterflies::butterfly_radix4_generic_avx_fma_dispatch,
crate::fft::butterflies::butterfly_radix5_stride1_avx_fma_dispatch,
crate::fft::butterflies::butterfly_radix5_generic_avx_fma_dispatch,
crate::fft::butterflies::butterfly_radix7_stride1_avx_fma_dispatch,
crate::fft::butterflies::butterfly_radix7_generic_avx_fma_dispatch,
crate::fft::butterflies::butterfly_radix8_stride1_avx_fma_dispatch,
crate::fft::butterflies::butterfly_radix8_generic_avx_fma_dispatch,
cfg = all(target_arch = "x86_64", not(feature = "no_std"))
);
define_stockham_autosort!(
stockham_autosort_sse2,
crate::fft::butterflies::butterfly_radix2_stride1_sse2_dispatch,
crate::fft::butterflies::butterfly_radix2_generic_sse2_dispatch,
crate::fft::butterflies::butterfly_radix3_stride1_sse2_dispatch,
crate::fft::butterflies::butterfly_radix3_generic_sse2_dispatch,
crate::fft::butterflies::butterfly_radix4_stride1_sse2_dispatch,
crate::fft::butterflies::butterfly_radix4_generic_sse2_dispatch,
crate::fft::butterflies::butterfly_radix5_stride1_sse2_dispatch,
crate::fft::butterflies::butterfly_radix5_generic_sse2_dispatch,
crate::fft::butterflies::butterfly_radix7_stride1_sse2_dispatch,
crate::fft::butterflies::butterfly_radix7_generic_sse2_dispatch,
crate::fft::butterflies::butterfly_radix8_stride1_sse2_dispatch,
crate::fft::butterflies::butterfly_radix8_generic_sse2_dispatch,
cfg = all(target_arch = "x86_64", not(feature = "no_std"))
);
define_stockham_autosort!(
stockham_autosort_sse4_2,
crate::fft::butterflies::butterfly_radix2_stride1_sse4_2_dispatch,
crate::fft::butterflies::butterfly_radix2_generic_sse4_2_dispatch,
crate::fft::butterflies::butterfly_radix3_stride1_sse4_2_dispatch,
crate::fft::butterflies::butterfly_radix3_generic_sse4_2_dispatch,
crate::fft::butterflies::butterfly_radix4_stride1_sse4_2_dispatch,
crate::fft::butterflies::butterfly_radix4_generic_sse4_2_dispatch,
crate::fft::butterflies::butterfly_radix5_stride1_sse4_2_dispatch,
crate::fft::butterflies::butterfly_radix5_generic_sse4_2_dispatch,
crate::fft::butterflies::butterfly_radix7_stride1_sse4_2_dispatch,
crate::fft::butterflies::butterfly_radix7_generic_sse4_2_dispatch,
crate::fft::butterflies::butterfly_radix8_stride1_sse4_2_dispatch,
crate::fft::butterflies::butterfly_radix8_generic_sse4_2_dispatch,
cfg = all(target_arch = "x86_64", not(feature = "no_std"))
);
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum OutputLocation {
Data,
Scratchpad,
}
#[cfg(any(not(target_arch = "x86_64"), feature = "no_std"))]
#[must_use]
pub(crate) fn stockham_autosort(
data: &mut [Complex32],
twiddles: &[Complex32],
factors: &[Radix],
scratchpad: &mut [Complex32],
) -> OutputLocation {
use crate::fft::butterflies::{
butterfly_radix2_dispatch, butterfly_radix3_dispatch, butterfly_radix4_dispatch,
butterfly_radix5_dispatch, butterfly_radix7_dispatch, butterfly_radix8_dispatch,
};
let n = data.len();
debug_assert_eq!(n, scratchpad.len());
if factors.is_empty() {
return OutputLocation::Data;
}
let mut twiddle_offset = 0;
let mut stride = 1;
let mut input = data;
let mut output = scratchpad;
for factor in factors {
let radix = factor.radix();
let num_twiddles_per_column = radix - 1;
let iterations = n / radix;
let stage_twiddle_count = (iterations * num_twiddles_per_column) * (stride != 1) as usize;
let stage_twiddles = &twiddles[twiddle_offset..twiddle_offset + stage_twiddle_count];
match factor {
Radix::Factor2 => butterfly_radix2_dispatch(input, output, stage_twiddles, stride),
Radix::Factor3 => butterfly_radix3_dispatch(input, output, stage_twiddles, stride),
Radix::Factor4 => butterfly_radix4_dispatch(input, output, stage_twiddles, stride),
Radix::Factor5 => butterfly_radix5_dispatch(input, output, stage_twiddles, stride),
Radix::Factor7 => butterfly_radix7_dispatch(input, output, stage_twiddles, stride),
Radix::Factor8 => butterfly_radix8_dispatch(input, output, stage_twiddles, stride),
}
core::mem::swap(&mut input, &mut output);
twiddle_offset += stage_twiddle_count;
stride *= radix;
}
if factors.len().is_multiple_of(2) {
OutputLocation::Data
} else {
OutputLocation::Scratchpad
}
}