use num_traits::Float;
const BLOCK_WIDTH: usize = 128;
const LOG_BLOCK_WIDTH: usize = 7;
#[inline]
pub(crate) fn bit_rev<T>(buf: &mut [T], log_n: usize) {
let mut nodd: usize;
let mut noddrev;
let big_n = 1 << log_n;
let halfn = big_n >> 1; let quartn = big_n >> 2;
let nmin1 = big_n - 1;
let mut forward = halfn; let mut rev = 1;
let mut i = quartn;
while i > 0 {
nodd = !i;
let mut zeros = 0;
while (nodd & 1) == 1 {
nodd >>= 1;
zeros += 1;
}
forward ^= 2 << zeros; rev ^= quartn >> zeros;
if forward < rev {
buf.swap(forward, rev);
nodd = nmin1 ^ forward; noddrev = nmin1 ^ rev;
buf.swap(nodd, noddrev); }
nodd = forward ^ 1; noddrev = rev ^ halfn;
buf.swap(nodd, noddrev);
i -= 1;
}
}
#[allow(unused)]
#[deprecated(
since = "0.1.0",
note = "Please use COBRA for a cache-optimal bit reverse permutation."
)]
fn complex_bit_rev<T: Float>(reals: &mut [T], imags: &mut [T], log_n: usize) {
let mut nodd: usize;
let mut noddrev;
let big_n = 1 << log_n;
let halfn = big_n >> 1; let quartn = big_n >> 2;
let nmin1 = big_n - 1;
let mut forward = halfn; let mut rev = 1;
let mut i = quartn;
while i > 0 {
nodd = !i;
let mut zeros = 0;
while (nodd & 1) == 1 {
nodd >>= 1;
zeros += 1;
}
forward ^= 2 << zeros; rev ^= quartn >> zeros;
if forward < rev {
reals.swap(forward, rev);
imags.swap(forward, rev);
nodd = nmin1 ^ forward; noddrev = nmin1 ^ rev;
reals.swap(nodd, noddrev);
imags.swap(nodd, noddrev);
}
nodd = forward ^ 1; noddrev = rev ^ halfn;
reals.swap(nodd, noddrev);
imags.swap(nodd, noddrev);
i -= 1;
}
}
#[allow(dead_code)]
#[deprecated(
since = "0.1.0",
note = "Naive bit reverse permutation is slow and not cache friendly. COBRA should be used instead."
)]
pub(crate) fn bit_reverse_permutation<T>(buf: &mut [T]) {
let n = buf.len();
let mut j = 0;
for i in 1..n {
let mut bit = n >> 1;
while (j & bit) != 0 {
j ^= bit;
bit >>= 1;
}
j ^= bit;
if i < j {
buf.swap(i, j);
}
}
}
#[multiversion::multiversion(targets("x86_64+avx512f+avx512bw+avx512cd+avx512dq+avx512vl", // x86_64-v4
"x86_64+avx2+fma", // x86_64-v3
"x86_64+sse4.2", // x86_64-v2
"x86+avx512f+avx512bw+avx512cd+avx512dq+avx512vl",
"x86+avx2+fma",
"x86+sse4.2",
"x86+sse2",
))]
pub fn cobra_apply<T: Default + Copy + Clone>(v: &mut [T], log_n: usize) {
if log_n <= 2 * LOG_BLOCK_WIDTH {
bit_rev(v, log_n);
return;
}
let num_b_bits = log_n - 2 * LOG_BLOCK_WIDTH;
let b_size: usize = 1 << num_b_bits;
let block_width: usize = 1 << LOG_BLOCK_WIDTH;
let mut buffer = [T::default(); BLOCK_WIDTH * BLOCK_WIDTH];
for b in 0..b_size {
let b_rev = b.reverse_bits() >> ((b_size - 1).leading_zeros());
for a in 0..block_width {
let a_rev = a.reverse_bits() >> ((block_width - 1).leading_zeros());
for c in 0..BLOCK_WIDTH {
buffer[(a_rev << LOG_BLOCK_WIDTH) | c] =
v[(a << num_b_bits << LOG_BLOCK_WIDTH) | (b << LOG_BLOCK_WIDTH) | c];
}
}
for c in 0..BLOCK_WIDTH {
let c_rev = c.reverse_bits() >> ((block_width - 1).leading_zeros());
for a_rev in 0..BLOCK_WIDTH {
let a = a_rev.reverse_bits() >> ((block_width - 1).leading_zeros());
let index_less_than_reverse = a < c_rev
|| (a == c_rev && b < b_rev)
|| (a == c_rev && b == b_rev && a_rev < c);
if index_less_than_reverse {
let v_idx = (c_rev << num_b_bits << LOG_BLOCK_WIDTH)
| (b_rev << LOG_BLOCK_WIDTH)
| a_rev;
let b_idx = (a_rev << LOG_BLOCK_WIDTH) | c;
std::mem::swap(&mut v[v_idx], &mut buffer[b_idx]);
}
}
}
for a in 0..BLOCK_WIDTH {
let a_rev = a.reverse_bits() >> ((block_width - 1).leading_zeros());
for c in 0..BLOCK_WIDTH {
let c_rev = c.reverse_bits() >> ((block_width - 1).leading_zeros());
let index_less_than_reverse = a < c_rev
|| (a == c_rev && b < b_rev)
|| (a == c_rev && b == b_rev && a_rev < c);
if index_less_than_reverse {
let v_idx = (a << num_b_bits << LOG_BLOCK_WIDTH) | (b << LOG_BLOCK_WIDTH) | c;
let b_idx = (a_rev << LOG_BLOCK_WIDTH) | c;
std::mem::swap(&mut v[v_idx], &mut buffer[b_idx]);
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn top_down_bit_reverse_permutation<T: Copy + Clone>(x: &[T]) -> Vec<T> {
if x.len() == 1 {
return x.to_vec();
}
let mut y = Vec::with_capacity(x.len());
let mut evens = Vec::with_capacity(x.len() >> 1);
let mut odds = Vec::with_capacity(x.len() >> 1);
let mut i = 1;
while i < x.len() {
evens.push(x[i - 1]);
odds.push(x[i]);
i += 2;
}
y.extend_from_slice(&top_down_bit_reverse_permutation(&evens));
y.extend_from_slice(&top_down_bit_reverse_permutation(&odds));
y
}
#[test]
fn cobra() {
for n in 4..23 {
let big_n = 1 << n;
let mut v: Vec<_> = (0..big_n).collect();
cobra_apply(&mut v, n);
let x: Vec<_> = (0..big_n).collect();
assert_eq!(v, top_down_bit_reverse_permutation(&x));
}
}
#[test]
fn bit_reversal() {
let n = 3;
let big_n = 1 << n;
let mut buf: Vec<f64> = (0..big_n).map(f64::from).collect();
bit_rev(&mut buf, n);
println!("{buf:?}");
assert_eq!(buf, vec![0.0, 4.0, 2.0, 6.0, 1.0, 5.0, 3.0, 7.0]);
let n = 4;
let big_n = 1 << n;
let mut buf: Vec<f64> = (0..big_n).map(f64::from).collect();
bit_rev(&mut buf, n);
println!("{buf:?}");
assert_eq!(
buf,
vec![
0.0, 8.0, 4.0, 12.0, 2.0, 10.0, 6.0, 14.0, 1.0, 9.0, 5.0, 13.0, 3.0, 11.0, 7.0,
15.0,
]
);
}
#[test]
fn jennifer_method() {
for n in 2..24 {
let big_n = 1 << n;
let mut actual_re: Vec<f64> = (0..big_n).map(f64::from).collect();
let mut actual_im: Vec<f64> = (0..big_n).map(f64::from).collect();
#[allow(deprecated)]
complex_bit_rev(&mut actual_re, &mut actual_im, n);
let input_re: Vec<f64> = (0..big_n).map(f64::from).collect();
let expected_re = top_down_bit_reverse_permutation(&input_re);
assert_eq!(actual_re, expected_re);
let input_im: Vec<f64> = (0..big_n).map(f64::from).collect();
let expected_im = top_down_bit_reverse_permutation(&input_im);
assert_eq!(actual_im, expected_im);
}
}
#[test]
fn naive_bit_reverse_permutation() {
for n in 2..24 {
let big_n = 1 << n;
let mut actual_re: Vec<f64> = (0..big_n).map(f64::from).collect();
let mut actual_im: Vec<f64> = (0..big_n).map(f64::from).collect();
#[allow(deprecated)]
bit_reverse_permutation(&mut actual_re);
#[allow(deprecated)]
bit_reverse_permutation(&mut actual_im);
let input_re: Vec<f64> = (0..big_n).map(f64::from).collect();
let expected_re = top_down_bit_reverse_permutation(&input_re);
assert_eq!(actual_re, expected_re);
let input_im: Vec<f64> = (0..big_n).map(f64::from).collect();
let expected_im = top_down_bit_reverse_permutation(&input_im);
assert_eq!(actual_im, expected_im);
}
}
}