fastlanes/
transpose.rs

1use crate::{FastLanes, FL_ORDER};
2use seq_macro::seq;
3
4pub trait Transpose: FastLanes {
5    fn transpose(input: &[Self; 1024], output: &mut [Self; 1024]);
6    fn untranspose(input: &[Self; 1024], output: &mut [Self; 1024]);
7}
8
9impl<T: FastLanes> Transpose for T {
10    #[inline(never)]
11    fn transpose(input: &[Self; 1024], output: &mut [Self; 1024]) {
12        seq!(i in 0..1024 {
13            output[i] = input[transpose(i)];
14        });
15    }
16
17    #[inline(never)]
18    fn untranspose(input: &[Self; 1024], output: &mut [Self; 1024]) {
19        seq!(i in 0..1024 {
20            output[transpose(i)] = input[i];
21        });
22    }
23}
24
25/// Return the corresponding index in a transposed `FastLanes` vector.
26#[allow(clippy::inline_always)]
27#[inline(always)]
28#[must_use]
29pub const fn transpose(idx: usize) -> usize {
30    // Row * 8, ORDER * 8, lane * 16.
31    let lane = idx % 16;
32    let order = (idx / 16) % 8;
33    let row = idx / 128;
34
35    (lane * 64) + (FL_ORDER[order] * 8) + row
36}