packed_simd 0.3.9

//! Implements portable horizontal integer vector arithmetic reductions.

macro_rules! impl_reduction_integer_arithmetic {
    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident
     | $test_tt:tt) => {
        impl $id {
            /// Horizontal wrapping sum of the vector elements.
            ///
            /// The intrinsic performs a tree-reduction of the vector elements.
            /// That is, for an 8 element vector:
            ///
            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
            ///
            /// If an operation overflows it returns the mathematical result
            /// modulo `2^n` where `n` is the number of times it overflows.
            #[inline]
            pub fn wrapping_sum(self) -> $elem_ty {
                #[cfg(not(target_arch = "aarch64"))]
                {
                    use crate::llvm::simd_reduce_add_ordered;
                    let v: $ielem_ty = unsafe { simd_reduce_add_ordered(self.0, 0 as $ielem_ty) };
                    v as $elem_ty
                }
                #[cfg(target_arch = "aarch64")]
                {
                    // FIXME: broken on AArch64
                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
                    let mut x = self.extract(0) as $elem_ty;
                    for i in 1..$id::lanes() {
                        x = x.wrapping_add(self.extract(i) as $elem_ty);
                    }
                    x
                }
            }

            /// Horizontal wrapping product of the vector elements.
            ///
            /// The intrinsic performs a tree-reduction of the vector elements.
            /// That is, for an 8 element vector:
            ///
            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
            ///
            /// If an operation overflows it returns the mathematical result
            /// modulo `2^n` where `n` is the number of times it overflows.
            #[inline]
            pub fn wrapping_product(self) -> $elem_ty {
                #[cfg(not(target_arch = "aarch64"))]
                {
                    use crate::llvm::simd_reduce_mul_ordered;
                    let v: $ielem_ty = unsafe { simd_reduce_mul_ordered(self.0, 1 as $ielem_ty) };
                    v as $elem_ty
                }
                #[cfg(target_arch = "aarch64")]
                {
                    // FIXME: broken on AArch64
                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
                    let mut x = self.extract(0) as $elem_ty;
                    for i in 1..$id::lanes() {
                        x = x.wrapping_mul(self.extract(i) as $elem_ty);
                    }
                    x
                }
            }
        }

        impl crate::iter::Sum for $id {
            #[inline]
            fn sum<I: Iterator<Item = $id>>(iter: I) -> $id {
                iter.fold($id::splat(0), crate::ops::Add::add)
            }
        }

        impl crate::iter::Product for $id {
            #[inline]
            fn product<I: Iterator<Item = $id>>(iter: I) -> $id {
                iter.fold($id::splat(1), crate::ops::Mul::mul)
            }
        }

        impl<'a> crate::iter::Sum<&'a $id> for $id {
            #[inline]
            fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
                iter.fold($id::splat(0), |a, b| crate::ops::Add::add(a, *b))
            }
        }

        impl<'a> crate::iter::Product<&'a $id> for $id {
            #[inline]
            fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
                iter.fold($id::splat(1), |a, b| crate::ops::Mul::mul(a, *b))
            }
        }

        test_if! {
            $test_tt:
            paste::item! {
                pub mod [<$id _reduction_int_arith>] {
                    use super::*;

                    fn alternating(x: usize) -> $id {
                        let mut v = $id::splat(1 as $elem_ty);
                        for i in 0..$id::lanes() {
                            if i % x == 0 {
                                v = v.replace(i, 2 as $elem_ty);
                            }
                        }
                        v
                    }

                    #[cfg_attr(not(target_arch = "wasm32"), test)]
                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
                    fn wrapping_sum() {
                        let v = $id::splat(0 as $elem_ty);
                        assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
                        let v = $id::splat(1 as $elem_ty);
                        assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
                        let v = alternating(2);
                        if $id::lanes() > 1 {
                            assert_eq!(
                                v.wrapping_sum(),
                                ($id::lanes() / 2 + $id::lanes()) as $elem_ty
                            );
                        } else {
                            assert_eq!(
                                v.wrapping_sum(),
                                2 as $elem_ty
                            );
                        }
                    }
                    #[cfg_attr(not(target_arch = "wasm32"), test)]
                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
                    fn wrapping_sum_overflow() {
                        let start = $elem_ty::max_value()
                            - ($id::lanes() as $elem_ty / 2);

                        let v = $id::splat(start as $elem_ty);
                        let vwrapping_sum = v.wrapping_sum();

                        let mut wrapping_sum = start;
                        for _ in 1..$id::lanes() {
                            wrapping_sum = wrapping_sum.wrapping_add(start);
                        }
                        assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v);
                    }

                    #[cfg_attr(not(target_arch = "wasm32"), test)]
                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
                    fn wrapping_product() {
                        let v = $id::splat(0 as $elem_ty);
                        assert_eq!(v.wrapping_product(), 0 as $elem_ty);
                        let v = $id::splat(1 as $elem_ty);
                        assert_eq!(v.wrapping_product(), 1 as $elem_ty);
                        let f = match $id::lanes() {
                            64 => 16,
                            32 => 8,
                            16 => 4,
                            _ => 2,
                        };
                        let v = alternating(f);
                        if $id::lanes() > 1 {
                            assert_eq!(
                                v.wrapping_product(),
                                (2_usize.pow(($id::lanes() / f) as u32)
                                 as $elem_ty)
                            );
                        } else {
                            assert_eq!(
                                v.wrapping_product(),
                                2 as $elem_ty
                            );
                        }
                    }

                    #[cfg_attr(not(target_arch = "wasm32"), test)]
                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
                    fn wrapping_product_overflow() {
                        let start = $elem_ty::max_value()
                            - ($id::lanes() as $elem_ty / 2);

                        let v = $id::splat(start as $elem_ty);
                        let vmul = v.wrapping_product();

                        let mut mul = start;
                        for _ in 1..$id::lanes() {
                            mul = mul.wrapping_mul(start);
                        }
                        assert_eq!(mul, vmul, "v = {:?}", v);
                    }
                }
            }
        }
    };
}