simdeez 3.0.1

SIMD library to abstract over different instruction sets and widths
Documentation
#![allow(deprecated)]

use super::*;
use crate::Simd;

pub struct Avx512;
impl Simd for Avx512 {
    type Vi8 = I8x64;
    type Vi16 = I16x32;
    type Vi32 = I32x16;
    type Vf32 = F32x16;
    type Vf64 = F64x8;
    type Vi64 = I64x8;

    #[inline]
    fn invoke<R>(f: impl FnOnce() -> R) -> R {
        #[inline]
        #[target_feature(enable = "avx512f", enable = "avx512bw", enable = "avx512dq")]
        unsafe fn inner<R>(f: impl FnOnce() -> R) -> R {
            f()
        }

        unsafe { inner(f) }
    }

    #[inline(always)]
    unsafe fn castps_pd(a: Self::Vf32) -> Self::Vf64 {
        F64x8(_mm512_castps_pd(a.0))
    }

    #[inline(always)]
    unsafe fn castpd_ps(a: Self::Vf64) -> Self::Vf32 {
        F32x16(_mm512_castpd_ps(a.0))
    }

    #[inline(always)]
    unsafe fn i32gather_epi32(arr: &[i32], index: Self::Vi32) -> Self::Vi32 {
        I32x16(_mm512_i32gather_epi32::<4>(index.0, arr.as_ptr()))
    }

    #[inline(always)]
    unsafe fn i64gather_epi64(arr: &[i64], index: Self::Vi64) -> Self::Vi64 {
        I64x8(_mm512_i64gather_epi64::<8>(index.0, arr.as_ptr()))
    }

    #[inline(always)]
    unsafe fn i32gather_ps(arr: &[f32], index: Self::Vi32) -> Self::Vf32 {
        F32x16(_mm512_i32gather_ps::<4>(index.0, arr.as_ptr()))
    }

    #[inline(always)]
    unsafe fn maskload_epi32(mem_addr: &i32, mask: Self::Vi32) -> Self::Vi32 {
        let kmask = _mm512_cmpneq_epi32_mask(mask.0, _mm512_setzero_si512());
        I32x16(_mm512_maskz_loadu_epi32(kmask, mem_addr as *const i32))
    }

    #[inline(always)]
    unsafe fn maskload_epi64(mem_addr: &i64, mask: Self::Vi64) -> Self::Vi64 {
        let kmask = _mm512_cmpneq_epi64_mask(mask.0, _mm512_setzero_si512());
        I64x8(_mm512_maskz_loadu_epi64(kmask, mem_addr as *const i64))
    }

    #[inline(always)]
    unsafe fn maskload_ps(mem_addr: &f32, mask: Self::Vi32) -> Self::Vf32 {
        let kmask = _mm512_cmpneq_epi32_mask(mask.0, _mm512_setzero_si512());
        F32x16(_mm512_maskz_loadu_ps(kmask, mem_addr as *const f32))
    }

    #[inline(always)]
    unsafe fn maskload_pd(mem_addr: &f64, mask: Self::Vi64) -> Self::Vf64 {
        let kmask = _mm512_cmpneq_epi64_mask(mask.0, _mm512_setzero_si512());
        F64x8(_mm512_maskz_loadu_pd(kmask, mem_addr as *const f64))
    }

    #[inline(always)]
    unsafe fn shuffle_epi32<const IMM8: i32>(a: Self::Vi32) -> Self::Vi32 {
        I32x16(_mm512_shuffle_epi32::<IMM8>(a.0))
    }
}