packed_simd 0.3.1

Portable Packed SIMD vectors
//! Horizontal swap bytes reductions.

// FIXME: investigate using `llvm.bswap`
// https://github.com/rust-lang-nursery/packed_simd/issues/19

use crate::*;

crate trait SwapBytes {
    fn swap_bytes(self) -> Self;
}

macro_rules! impl_swap_bytes {
    (v16: $($id:ident,)+) => {
        $(
            impl SwapBytes for $id {
                #[inline]
                fn swap_bytes(self) -> Self {
                    unsafe { shuffle!(self, [1, 0]) }
                }
            }
        )+
    };
    (v32: $($id:ident,)+) => {
        $(
            impl SwapBytes for $id {
                #[inline]
                #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))]
                fn swap_bytes(self) -> Self {
                    unsafe {
                        let bytes: u8x4 = crate::mem::transmute(self);
                        let result: u8x4 = shuffle!(bytes, [3, 2, 1, 0]);
                        crate::mem::transmute(result)
                    }
                }
            }
        )+
    };
    (v64: $($id:ident,)+) => {
        $(
            impl SwapBytes for $id {
                #[inline]
                #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))]
                fn swap_bytes(self) -> Self {
                    unsafe {
                        let bytes: u8x8 = crate::mem::transmute(self);
                        let result: u8x8 = shuffle!(bytes, [7, 6, 5, 4, 3, 2, 1, 0]);
                        crate::mem::transmute(result)
                    }
                }
            }
        )+
    };
    (v128: $($id:ident,)+) => {
        $(
            impl SwapBytes for $id {
                #[inline]
                #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))]
                fn swap_bytes(self) -> Self {
                    unsafe {
                        let bytes: u8x16 = crate::mem::transmute(self);
                        let result: u8x16 = shuffle!(bytes, [
                            15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
                        ]);
                        crate::mem::transmute(result)
                    }
                }
            }
        )+
    };
    (v256: $($id:ident,)+) => {
        $(
            impl SwapBytes for $id {
                #[inline]
                #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))]
                fn swap_bytes(self) -> Self {
                    unsafe {
                        let bytes: u8x32 = crate::mem::transmute(self);
                        let result: u8x32 = shuffle!(bytes, [
                            31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
                            15, 14, 13, 12, 11, 10, 9,  8,  7,  6,  5,  4,  3,  2,  1,  0
                        ]);
                        crate::mem::transmute(result)
                    }
                }
            }
        )+
    };
    (v512: $($id:ident,)+) => {
        $(
            impl SwapBytes for $id {
                #[inline]
                #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))]
                fn swap_bytes(self) -> Self {
                    unsafe {
                        let bytes: u8x64 = crate::mem::transmute(self);
                        let result: u8x64 = shuffle!(bytes, [
                            63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48,
                            47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32,
                            31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
                            15, 14, 13, 12, 11, 10, 9,  8,  7,  6,  5,  4,  3,  2,  1,  0
                        ]);
                        crate::mem::transmute(result)
                    }
                }
            }
        )+
    };
}

impl_swap_bytes!(v16: u8x2, i8x2,);
impl_swap_bytes!(v32: u8x4, i8x4, u16x2, i16x2,);
// FIXME: 64-bit single element vector
impl_swap_bytes!(
    v64: u8x8,
    i8x8,
    u16x4,
    i16x4,
    u32x2,
    i32x2, /* u64x1, i64x1, */
);

impl_swap_bytes!(
    v128: u8x16,
    i8x16,
    u16x8,
    i16x8,
    u32x4,
    i32x4,
    u64x2,
    i64x2,
    u128x1,
    i128x1,
);
impl_swap_bytes!(
    v256: u8x32,
    i8x32,
    u16x16,
    i16x16,
    u32x8,
    i32x8,
    u64x4,
    i64x4,
    u128x2,
    i128x2,
);

impl_swap_bytes!(
    v512: u8x64,
    i8x64,
    u16x32,
    i16x32,
    u32x16,
    i32x16,
    u64x8,
    i64x8,
    u128x4,
    i128x4,
);

cfg_if! {
    if #[cfg(target_pointer_width = "8")] {
        impl_swap_bytes!(v16: isizex2, usizex2,);
        impl_swap_bytes!(v32: isizex4, usizex4,);
        impl_swap_bytes!(v64: isizex8, usizex8,);
    } else if #[cfg(target_pointer_width = "16")] {
        impl_swap_bytes!(v32: isizex2, usizex2,);
        impl_swap_bytes!(v64: isizex4, usizex4,);
        impl_swap_bytes!(v128: isizex8, usizex8,);
    } else if #[cfg(target_pointer_width = "32")] {
        impl_swap_bytes!(v64: isizex2, usizex2,);
        impl_swap_bytes!(v128: isizex4, usizex4,);
        impl_swap_bytes!(v256: isizex8, usizex8,);
    } else if #[cfg(target_pointer_width = "64")] {
        impl_swap_bytes!(v128: isizex2, usizex2,);
        impl_swap_bytes!(v256: isizex4, usizex4,);
        impl_swap_bytes!(v512: isizex8, usizex8,);
    } else {
        compile_error!("unsupported target_pointer_width");
    }
}