packed_simd 0.3.3

Portable Packed SIMD vectors
Documentation
//! Vertical floating-point `sin_cos`
#![allow(unused)]

// FIXME 64-bit 1 elem vectors sin_cos

use crate::*;

crate trait SinCosPi: Sized {
    type Output;
    fn sin_cos_pi(self) -> Self::Output;
}

macro_rules! impl_def {
    ($vid:ident, $PI:path) => {
        impl SinCosPi for $vid {
            type Output = (Self, Self);
            #[inline]
            fn sin_cos_pi(self) -> Self::Output {
                let v = self * Self::splat($PI);
                (v.sin(), v.cos())
            }
        }
    };
}

macro_rules! impl_def32 {
    ($vid:ident) => {
        impl_def!($vid, crate::f32::consts::PI);
    };
}
macro_rules! impl_def64 {
    ($vid:ident) => {
        impl_def!($vid, crate::f64::consts::PI);
    };
}

macro_rules! impl_unary_t {
    ($vid:ident: $fun:ident) => {
        impl SinCosPi for $vid {
            type Output = (Self, Self);
            fn sin_cos_pi(self) -> Self::Output {
                unsafe {
                    use crate::mem::transmute;
                    transmute($fun(transmute(self)))
                }
            }
        }
    };
    ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
        impl SinCosPi for $vid {
            type Output = (Self, Self);
            fn sin_cos_pi(self) -> Self::Output {
                unsafe {
                    use crate::mem::{transmute, uninitialized};

                    union U {
                        vec: [$vid; 2],
                        twice: $vid_t,
                    }

                    let twice = U { vec: [self, uninitialized()] }.twice;
                    let twice = transmute($fun(transmute(twice)));

                    union R {
                        twice: ($vid_t, $vid_t),
                        vecs: ([$vid; 2], [$vid; 2]),
                    }
                    let r = R { twice }.vecs;
                    (*r.0.get_unchecked(0), *r.0.get_unchecked(1))
                }
            }
        }
    };
    ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
        impl SinCosPi for $vid {
            type Output = (Self, Self);
            fn sin_cos_pi(self) -> Self::Output {
                unsafe {
                    use crate::mem::transmute;

                    union U {
                        vec: $vid,
                        halves: [$vid_h; 2],
                    }

                    let halves = U { vec: self }.halves;

                    let res_0: ($vid_h, $vid_h) =
                        transmute($fun(transmute(*halves.get_unchecked(0))));
                    let res_1: ($vid_h, $vid_h) =
                        transmute($fun(transmute(*halves.get_unchecked(1))));

                    union R {
                        result: ($vid, $vid),
                        halves: ([$vid_h; 2], [$vid_h; 2]),
                    }
                    R { halves: ([res_0.0, res_1.0], [res_0.1, res_1.1]) }
                        .result
                }
            }
        }
    };
    ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
        impl SinCosPi for $vid {
            type Output = (Self, Self);
            fn sin_cos_pi(self) -> Self::Output {
                unsafe {
                    use crate::mem::transmute;

                    union U {
                        vec: $vid,
                        quarters: [$vid_q; 4],
                    }

                    let quarters = U { vec: self }.quarters;

                    let res_0: ($vid_q, $vid_q) =
                        transmute($fun(transmute(*quarters.get_unchecked(0))));
                    let res_1: ($vid_q, $vid_q) =
                        transmute($fun(transmute(*quarters.get_unchecked(1))));
                    let res_2: ($vid_q, $vid_q) =
                        transmute($fun(transmute(*quarters.get_unchecked(2))));
                    let res_3: ($vid_q, $vid_q) =
                        transmute($fun(transmute(*quarters.get_unchecked(3))));

                    union R {
                        result: ($vid, $vid),
                        quarters: ([$vid_q; 4], [$vid_q; 4]),
                    }
                    R {
                        quarters: (
                            [res_0.0, res_1.0, res_2.0, res_3.0],
                            [res_0.1, res_1.1, res_2.1, res_3.1],
                        ),
                    }
                    .result
                }
            }
        }
    };
}

cfg_if! {
    if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
        use sleef_sys::*;
        cfg_if! {
            if #[cfg(target_feature = "avx2")] {
                impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05avx2128);
                impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx2);
                impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx2);

                impl_unary_t!(f32x4: Sleef_sincospif4_u05avx2128);
                impl_unary_t!(f32x8: Sleef_sincospif8_u05avx2);
                impl_unary_t!(f64x2: Sleef_sincospid2_u05avx2128);
                impl_unary_t!(f64x4: Sleef_sincospid4_u05avx2);
            } else if #[cfg(target_feature = "avx")] {
                impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4);
                impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx);
                impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx);

                impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4);
                impl_unary_t!(f32x8: Sleef_sincospif8_u05avx);
                impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4);
                impl_unary_t!(f64x4: Sleef_sincospid4_u05avx);
            } else if #[cfg(target_feature = "sse4.2")] {
                impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4);
                impl_unary_t!(f32x16[q => f32x4]: Sleef_sincospif4_u05sse4);
                impl_unary_t!(f64x8[q => f64x2]: Sleef_sincospid2_u05sse4);

                impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4);
                impl_unary_t!(f32x8[h => f32x4]: Sleef_sincospif4_u05sse4);
                impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4);
                impl_unary_t!(f64x4[h => f64x2]: Sleef_sincospid2_u05sse4);
            } else {
                impl_def32!(f32x2);
                impl_def32!(f32x4);
                impl_def32!(f32x8);
                impl_def32!(f32x16);

                impl_def64!(f64x2);
                impl_def64!(f64x4);
                impl_def64!(f64x8);
            }
        }
    } else {
        impl_def32!(f32x2);
        impl_def32!(f32x4);
        impl_def32!(f32x8);
        impl_def32!(f32x16);

        impl_def64!(f64x2);
        impl_def64!(f64x4);
        impl_def64!(f64x8);
    }
}