[][src]Trait simdeez::Simd

pub trait Simd {
    type Vi16: SimdSmallInt<Self::Vi16, i16>;
    type Vi32: SimdSmallInt<Self::Vi32, i32>;
    type Vi64: SimdBase<Self::Vi64, i64> + Not<Output = Self::Vi64>;
    type Vf32: SimdFloat<Self::Vf32, f32>;
    type Vf64: SimdFloat<Self::Vf64, f64>;

    const VF32_WIDTH: usize;
    const VF64_WIDTH: usize;
    const VI16_WIDTH: usize;
    const VI32_WIDTH: usize;
    const VI64_WIDTH: usize;

    unsafe fn div_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
unsafe fn div_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64;
unsafe fn abs_ps(a: Self::Vf32) -> Self::Vf32;
unsafe fn abs_pd(a: Self::Vf64) -> Self::Vf64;
unsafe fn add_epi16(a: Self::Vi16, b: Self::Vi16) -> Self::Vi16;
unsafe fn sub_epi16(a: Self::Vi16, b: Self::Vi16) -> Self::Vi16;
unsafe fn mullo_epi16(a: Self::Vi16, b: Self::Vi16) -> Self::Vi16;
unsafe fn add_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
unsafe fn add_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
unsafe fn add_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64;
unsafe fn and_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
unsafe fn and_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64;
unsafe fn andnot_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
unsafe fn andnot_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64;
unsafe fn andnot_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
unsafe fn andnot_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64;
unsafe fn blendv_epi32(
        a: Self::Vi32,
        b: Self::Vi32,
        mask: Self::Vi32
    ) -> Self::Vi32;
unsafe fn blendv_epi64(
        a: Self::Vi64,
        b: Self::Vi64,
        mask: Self::Vi64
    ) -> Self::Vi64;
unsafe fn blendv_ps(
        a: Self::Vf32,
        b: Self::Vf32,
        mask: Self::Vf32
    ) -> Self::Vf32;
unsafe fn blendv_pd(
        a: Self::Vf64,
        b: Self::Vf64,
        mask: Self::Vf64
    ) -> Self::Vf64;
unsafe fn castps_epi32(a: Self::Vf32) -> Self::Vi32;
unsafe fn castpd_epi64(a: Self::Vf64) -> Self::Vi64;
unsafe fn castepi32_ps(a: Self::Vi32) -> Self::Vf32;
unsafe fn castepi64_pd(a: Self::Vi64) -> Self::Vf64;
unsafe fn castps_pd(a: Self::Vf32) -> Self::Vf64;
unsafe fn castpd_ps(a: Self::Vf64) -> Self::Vf32;
unsafe fn ceil_ps(a: Self::Vf32) -> Self::Vf32;
unsafe fn ceil_pd(a: Self::Vf64) -> Self::Vf64;
unsafe fn cmpeq_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64;
unsafe fn cmpneq_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64;
unsafe fn cmpge_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64;
unsafe fn cmpgt_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64;
unsafe fn cmple_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64;
unsafe fn cmplt_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64;
unsafe fn cmpeq_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
unsafe fn cmpneq_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
unsafe fn cmpge_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
unsafe fn cmpgt_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
unsafe fn cmple_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
unsafe fn cmplt_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
unsafe fn cmpeq_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
unsafe fn cmpneq_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
unsafe fn cmpge_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
unsafe fn cmpgt_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
unsafe fn cmple_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
unsafe fn cmplt_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
unsafe fn cmpeq_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64;
unsafe fn cmpneq_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64;
unsafe fn cmpge_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64;
unsafe fn cmpgt_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64;
unsafe fn cmple_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64;
unsafe fn cmplt_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64;
unsafe fn cvtepi32_ps(a: Self::Vi32) -> Self::Vf32;
unsafe fn cvtps_epi32(a: Self::Vf32) -> Self::Vi32;
unsafe fn floor_ps(a: Self::Vf32) -> Self::Vf32;
unsafe fn floor_pd(a: Self::Vf64) -> Self::Vf64;
unsafe fn fast_round_ps(a: Self::Vf32) -> Self::Vf32;
unsafe fn fast_ceil_ps(a: Self::Vf32) -> Self::Vf32;
unsafe fn fast_floor_ps(a: Self::Vf32) -> Self::Vf32;
unsafe fn fmadd_ps(
        a: Self::Vf32,
        b: Self::Vf32,
        c: Self::Vf32
    ) -> Self::Vf32;
unsafe fn fnmadd_ps(
        a: Self::Vf32,
        b: Self::Vf32,
        c: Self::Vf32
    ) -> Self::Vf32;
unsafe fn fmadd_pd(
        a: Self::Vf64,
        b: Self::Vf64,
        c: Self::Vf64
    ) -> Self::Vf64;
unsafe fn fnmadd_pd(
        a: Self::Vf64,
        b: Self::Vf64,
        c: Self::Vf64
    ) -> Self::Vf64;
unsafe fn fmsub_ps(
        a: Self::Vf32,
        b: Self::Vf32,
        c: Self::Vf32
    ) -> Self::Vf32;
unsafe fn fnmsub_ps(
        a: Self::Vf32,
        b: Self::Vf32,
        c: Self::Vf32
    ) -> Self::Vf32;
unsafe fn fmsub_pd(
        a: Self::Vf64,
        b: Self::Vf64,
        c: Self::Vf64
    ) -> Self::Vf64;
unsafe fn fnmsub_pd(
        a: Self::Vf64,
        b: Self::Vf64,
        c: Self::Vf64
    ) -> Self::Vf64;
unsafe fn horizontal_add_ps(a: Self::Vf32) -> f32;
unsafe fn horizontal_add_pd(a: Self::Vf64) -> f64;
unsafe fn i32gather_epi32(arr: &[i32], index: Self::Vi32) -> Self::Vi32;
unsafe fn i32gather_ps(arr: &[f32], index: Self::Vi32) -> Self::Vf32;
unsafe fn load_ps(a: &f32) -> Self::Vf32;
unsafe fn load_pd(a: &f64) -> Self::Vf64;
unsafe fn load_epi32(a: &i32) -> Self::Vi32;
unsafe fn load_epi64(a: &i64) -> Self::Vi64;
unsafe fn loadu_ps(a: &f32) -> Self::Vf32;
unsafe fn loadu_pd(a: &f64) -> Self::Vf64;
unsafe fn loadu_epi32(a: &i32) -> Self::Vi32;
unsafe fn loadu_epi64(a: &i64) -> Self::Vi64;
unsafe fn maskload_epi32(mem_addr: &i32, mask: Self::Vi32) -> Self::Vi32;
unsafe fn maskload_epi64(mem_addr: &i64, mask: Self::Vi64) -> Self::Vi64;
unsafe fn maskload_ps(mem_addr: &f32, mask: Self::Vi32) -> Self::Vf32;
unsafe fn maskload_pd(mem_addr: &f64, mask: Self::Vi64) -> Self::Vf64;
unsafe fn store_ps(mem_addr: &mut f32, a: Self::Vf32);
unsafe fn store_pd(mem_addr: &mut f64, a: Self::Vf64);
unsafe fn store_epi32(mem_addr: &mut i32, a: Self::Vi32);
unsafe fn store_epi64(mem_addr: &mut i64, a: Self::Vi64);
unsafe fn storeu_ps(mem_addr: &mut f32, a: Self::Vf32);
unsafe fn storeu_pd(mem_addr: &mut f64, a: Self::Vf64);
unsafe fn storeu_epi32(mem_addr: &mut i32, a: Self::Vi32);
unsafe fn storeu_epi64(mem_addr: &mut i64, a: Self::Vi64);
unsafe fn maskstore_epi32(
        mem_addr: &mut i32,
        mask: Self::Vi32,
        a: Self::Vi32
    );
unsafe fn maskstore_epi64(
        mem_addr: &mut i64,
        mask: Self::Vi64,
        a: Self::Vi64
    );
unsafe fn maskstore_ps(mem_addr: &mut f32, mask: Self::Vi32, a: Self::Vf32);
unsafe fn maskstore_pd(mem_addr: &mut f64, mask: Self::Vi64, a: Self::Vf64);
unsafe fn max_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
unsafe fn min_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
unsafe fn max_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
unsafe fn min_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
unsafe fn max_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64;
unsafe fn min_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64;
unsafe fn mul_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
unsafe fn mul_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64;
unsafe fn mullo_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
unsafe fn not_epi32(a: Self::Vi32) -> Self::Vi32;
unsafe fn not_epi64(a: Self::Vi64) -> Self::Vi64;
unsafe fn or_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
unsafe fn or_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64;
unsafe fn or_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
unsafe fn or_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64;
unsafe fn rcp_ps(a: Self::Vf32) -> Self::Vf32;
unsafe fn round_ps(a: Self::Vf32) -> Self::Vf32;
unsafe fn round_pd(a: Self::Vf64) -> Self::Vf64;
unsafe fn set1_epi32(a: i32) -> Self::Vi32;
unsafe fn set1_epi64(a: i64) -> Self::Vi64;
unsafe fn set1_ps(a: f32) -> Self::Vf32;
unsafe fn set1_pd(a: f64) -> Self::Vf64;
unsafe fn setzero_ps() -> Self::Vf32;
unsafe fn setzero_pd() -> Self::Vf64;
unsafe fn setzero_epi32() -> Self::Vi32;
unsafe fn setzero_epi64() -> Self::Vi64;
unsafe fn srai_epi32(a: Self::Vi32, amt_const: i32) -> Self::Vi32;
unsafe fn srai_epi64(a: Self::Vi64, amt_const: i32) -> Self::Vi64;
unsafe fn srli_epi32(a: Self::Vi32, amt_const: i32) -> Self::Vi32;
unsafe fn slli_epi32(a: Self::Vi32, amt_const: i32) -> Self::Vi32;
unsafe fn sra_epi32(a: Self::Vi32, amt: i32) -> Self::Vi32;
unsafe fn srl_epi32(a: Self::Vi32, amt: i32) -> Self::Vi32;
unsafe fn sll_epi32(a: Self::Vi32, amt: i32) -> Self::Vi32;
unsafe fn sub_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
unsafe fn sub_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
unsafe fn sub_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64;
unsafe fn sqrt_ps(a: Self::Vf32) -> Self::Vf32;
unsafe fn rsqrt_ps(a: Self::Vf32) -> Self::Vf32;
unsafe fn sqrt_pd(a: Self::Vf64) -> Self::Vf64;
unsafe fn rsqrt_pd(a: Self::Vf64) -> Self::Vf64;
unsafe fn shuffle_epi32(a: Self::Vi32, imm8: i32) -> Self::Vi32;
unsafe fn xor_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
unsafe fn xor_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64;
unsafe fn xor_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
unsafe fn xor_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64; }

The abstract SIMD trait which is implemented by Avx2, Sse41, etc

Associated Types

type Vi16: SimdSmallInt<Self::Vi16, i16>

Vector of i16s. Corresponds to __m128i when used with the Sse impl, __m256i when used with Avx2, or a single i16 when used with Scalar.

type Vi32: SimdSmallInt<Self::Vi32, i32>

Vector of i32s. Corresponds to __m128i when used with the Sse impl, __m256i when used with Avx2, or a single i32 when used with Scalar.

type Vi64: SimdBase<Self::Vi64, i64> + Not<Output = Self::Vi64>

Vector of i64s. Corresponds to __m128i when used with the Sse impl, __m256i when used with Avx2, or a single i64 when used with Scalar.

type Vf32: SimdFloat<Self::Vf32, f32>

Vector of f32s. Corresponds to __m128 when used with the Sse impl, __m256 when used with Avx2, or a single f32 when used with Scalar.

type Vf64: SimdFloat<Self::Vf64, f64>

Vector of f64s. Corresponds to __m128d when used with the Sse impl, __m256d when used with Avx2, or a single f64 when used with Scalar.

Loading content...

Associated Constants

const VF32_WIDTH: usize

The width of the vector lane. Necessary for creating lane width agnostic code.

const VF64_WIDTH: usize

const VI16_WIDTH: usize

const VI32_WIDTH: usize

const VI64_WIDTH: usize

Loading content...

Required methods

unsafe fn div_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32

unsafe fn div_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64

unsafe fn abs_ps(a: Self::Vf32) -> Self::Vf32

unsafe fn abs_pd(a: Self::Vf64) -> Self::Vf64

unsafe fn add_epi16(a: Self::Vi16, b: Self::Vi16) -> Self::Vi16

unsafe fn sub_epi16(a: Self::Vi16, b: Self::Vi16) -> Self::Vi16

unsafe fn mullo_epi16(a: Self::Vi16, b: Self::Vi16) -> Self::Vi16

unsafe fn add_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32

unsafe fn add_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32

unsafe fn add_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64

unsafe fn and_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32

unsafe fn and_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64

unsafe fn andnot_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32

unsafe fn andnot_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64

unsafe fn andnot_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32

unsafe fn andnot_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64

unsafe fn blendv_epi32(
    a: Self::Vi32,
    b: Self::Vi32,
    mask: Self::Vi32
) -> Self::Vi32

Note SSE2 will select B only when all bits are 1, while SSE41 and AVX2 only check the high bit. To maintain portability ensure all bits are 1 when using blend. Results of comparison operations adhere to this.

unsafe fn blendv_epi64(
    a: Self::Vi64,
    b: Self::Vi64,
    mask: Self::Vi64
) -> Self::Vi64

Note SSE2 will select B only when all bits are 1, while SSE41 and AVX2 only check the high bit. To maintain portability ensure all bits are 1 when using blend. Results of comparison operations adhere to this.

unsafe fn blendv_ps(
    a: Self::Vf32,
    b: Self::Vf32,
    mask: Self::Vf32
) -> Self::Vf32

Note SSE2 will select B only when all bits are 1, while SSE41 and AVX2 only check the high bit. To maintain portability ensure all bits are 1 when using blend. Results of comparison operations adhere to this.

unsafe fn blendv_pd(
    a: Self::Vf64,
    b: Self::Vf64,
    mask: Self::Vf64
) -> Self::Vf64

Note SSE2 will select B only when all bits are 1, while SSE41 and AVX2 only check the high bit. To maintain portability ensure all bits are 1 when using blend. Results of comparison operations adhere to this.

unsafe fn castps_epi32(a: Self::Vf32) -> Self::Vi32

unsafe fn castpd_epi64(a: Self::Vf64) -> Self::Vi64

unsafe fn castepi32_ps(a: Self::Vi32) -> Self::Vf32

unsafe fn castepi64_pd(a: Self::Vi64) -> Self::Vf64

unsafe fn castps_pd(a: Self::Vf32) -> Self::Vf64

unsafe fn castpd_ps(a: Self::Vf64) -> Self::Vf32

unsafe fn ceil_ps(a: Self::Vf32) -> Self::Vf32

unsafe fn ceil_pd(a: Self::Vf64) -> Self::Vf64

unsafe fn cmpeq_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64

unsafe fn cmpneq_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64

unsafe fn cmpge_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64

unsafe fn cmpgt_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64

unsafe fn cmple_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64

unsafe fn cmplt_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64

unsafe fn cmpeq_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32

unsafe fn cmpneq_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32

unsafe fn cmpge_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32

unsafe fn cmpgt_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32

unsafe fn cmple_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32

unsafe fn cmplt_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32

unsafe fn cmpeq_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32

unsafe fn cmpneq_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32

unsafe fn cmpge_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32

unsafe fn cmpgt_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32

unsafe fn cmple_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32

unsafe fn cmplt_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32

unsafe fn cmpeq_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64

unsafe fn cmpneq_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64

unsafe fn cmpge_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64

unsafe fn cmpgt_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64

unsafe fn cmple_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64

unsafe fn cmplt_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64

unsafe fn cvtepi32_ps(a: Self::Vi32) -> Self::Vf32

unsafe fn cvtps_epi32(a: Self::Vf32) -> Self::Vi32

Currently scalar will have different results in some cases depending on the current SSE rounding mode.

unsafe fn floor_ps(a: Self::Vf32) -> Self::Vf32

unsafe fn floor_pd(a: Self::Vf64) -> Self::Vf64

unsafe fn fast_round_ps(a: Self::Vf32) -> Self::Vf32

When using Sse2, fastround uses a faster version of floor that only works on floating point values small enough to fit in an i32. This is a big performance boost if you don't need a complete floor.

unsafe fn fast_ceil_ps(a: Self::Vf32) -> Self::Vf32

When using Sse2, fastceil uses a faster version of floor that only works on floating point values small enough to fit in an i32. This is a big performance boost if you don't need a complete floor.

unsafe fn fast_floor_ps(a: Self::Vf32) -> Self::Vf32

When using Sse2, fastfloor uses a faster version of floor that only works on floating point values small enough to fit in an i32. This is a big performance boost if you don't need a complete floor.

unsafe fn fmadd_ps(a: Self::Vf32, b: Self::Vf32, c: Self::Vf32) -> Self::Vf32

Actual FMA instructions will be used when Avx2 is used, otherwise a mul and add are used to replicate it, allowing you to just always use FMA in your code and get best perf in both cases.

unsafe fn fnmadd_ps(a: Self::Vf32, b: Self::Vf32, c: Self::Vf32) -> Self::Vf32

Actual FMA instructions will be used when Avx2 is used, otherwise a mul and add are used to replicate it, allowing you to just always use FMA in your code and get best perf in both cases.

unsafe fn fmadd_pd(a: Self::Vf64, b: Self::Vf64, c: Self::Vf64) -> Self::Vf64

Actual FMA instructions will be used when Avx2 is used, otherwise a mul and add are used to replicate it, allowing you to just always use FMA in your code and get best perf in both cases.

unsafe fn fnmadd_pd(a: Self::Vf64, b: Self::Vf64, c: Self::Vf64) -> Self::Vf64

Actual FMA instructions will be used when Avx2 is used, otherwise a mul and add are used to replicate it, allowing you to just always use FMA in your code and get best perf in both cases.

unsafe fn fmsub_ps(a: Self::Vf32, b: Self::Vf32, c: Self::Vf32) -> Self::Vf32

Actual FMA instructions will be used when Avx2 is used, otherwise a mul and sub are used to replicate it, allowing you to just always use FMA in your code and get best perf in both cases.

unsafe fn fnmsub_ps(a: Self::Vf32, b: Self::Vf32, c: Self::Vf32) -> Self::Vf32

Actual FMA instructions will be used when Avx2 is used, otherwise a mul and sub are used to replicate it, allowing you to just always use FMA in your code and get best perf in both cases.

unsafe fn fmsub_pd(a: Self::Vf64, b: Self::Vf64, c: Self::Vf64) -> Self::Vf64

Actual FMA instructions will be used when Avx2 is used, otherwise a mul and sub are used to replicate it, allowing you to just always use FMA in your code and get best perf in both cases.

unsafe fn fnmsub_pd(a: Self::Vf64, b: Self::Vf64, c: Self::Vf64) -> Self::Vf64

Actual FMA instructions will be used when Avx2 is used, otherwise a mul and sub are used to replicate it, allowing you to just always use FMA in your code and get best perf in both cases.

unsafe fn horizontal_add_ps(a: Self::Vf32) -> f32

Adds all lanes together. Distinct from h_add which adds pairs.

unsafe fn horizontal_add_pd(a: Self::Vf64) -> f64

Adds all lanes together. Distinct from h_add which adds pairs.

unsafe fn i32gather_epi32(arr: &[i32], index: Self::Vi32) -> Self::Vi32

Sse2 and Sse41 paths will simulate a gather by breaking out and doing scalar array accesses, because gather doesn't exist until Avx2.

unsafe fn i32gather_ps(arr: &[f32], index: Self::Vi32) -> Self::Vf32

Sse2 and Sse41 paths will simulate a gather by breaking out and doing scalar array accesses, because gather doesn't exist until Avx2.

unsafe fn load_ps(a: &f32) -> Self::Vf32

unsafe fn load_pd(a: &f64) -> Self::Vf64

unsafe fn load_epi32(a: &i32) -> Self::Vi32

unsafe fn load_epi64(a: &i64) -> Self::Vi64

unsafe fn loadu_ps(a: &f32) -> Self::Vf32

unsafe fn loadu_pd(a: &f64) -> Self::Vf64

unsafe fn loadu_epi32(a: &i32) -> Self::Vi32

unsafe fn loadu_epi64(a: &i64) -> Self::Vi64

unsafe fn maskload_epi32(mem_addr: &i32, mask: Self::Vi32) -> Self::Vi32

Note, SSE2 and SSE4 will load when mask[i] is nonzero, where AVX2 will store only when the high bit is set. To ensure portability ensure that the high bit is set.

unsafe fn maskload_epi64(mem_addr: &i64, mask: Self::Vi64) -> Self::Vi64

Note, SSE2 and SSE4 will load when mask[i] is nonzero, where AVX2 will store only when the high bit is set. To ensure portability ensure that the high bit is set.

unsafe fn maskload_ps(mem_addr: &f32, mask: Self::Vi32) -> Self::Vf32

Note, SSE2 and SSE4 will load when mask[i] is nonzero, where AVX2 will store only when the high bit is set. To ensure portability ensure that the high bit is set.

unsafe fn maskload_pd(mem_addr: &f64, mask: Self::Vi64) -> Self::Vf64

Note, SSE2 and SSE4 will load when mask[i] is nonzero, where AVX2 will store only when the high bit is set. To ensure portability ensure that the high bit is set.

unsafe fn store_ps(mem_addr: &mut f32, a: Self::Vf32)

unsafe fn store_pd(mem_addr: &mut f64, a: Self::Vf64)

unsafe fn store_epi32(mem_addr: &mut i32, a: Self::Vi32)

unsafe fn store_epi64(mem_addr: &mut i64, a: Self::Vi64)

unsafe fn storeu_ps(mem_addr: &mut f32, a: Self::Vf32)

unsafe fn storeu_pd(mem_addr: &mut f64, a: Self::Vf64)

unsafe fn storeu_epi32(mem_addr: &mut i32, a: Self::Vi32)

unsafe fn storeu_epi64(mem_addr: &mut i64, a: Self::Vi64)

unsafe fn maskstore_epi32(mem_addr: &mut i32, mask: Self::Vi32, a: Self::Vi32)

Note, SSE2 and SSE4 will store when mask[i] is nonzero, where AVX2 will store only when the high bit is set. To ensure portability ensure the high bit is set.

unsafe fn maskstore_epi64(mem_addr: &mut i64, mask: Self::Vi64, a: Self::Vi64)

Note, SSE2 and SSE4 will store when mask[i] is nonzero, where AVX2 will store only when the high bit is set. To ensure portability ensure the high bit is set.

unsafe fn maskstore_ps(mem_addr: &mut f32, mask: Self::Vi32, a: Self::Vf32)

Note, SSE2 and SSE4 will store when mask[i] is nonzero, where AVX2 will store only when the high bit is set. To ensure portability ensure the high bit is set.

unsafe fn maskstore_pd(mem_addr: &mut f64, mask: Self::Vi64, a: Self::Vf64)

Note, SSE2 and SSE4 will store when mask[i] is nonzero, where AVX2 will store only when the high bit is set. To ensure portability ensure the high bit is set.

unsafe fn max_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32

unsafe fn min_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32

unsafe fn max_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32

unsafe fn min_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32

unsafe fn max_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64

unsafe fn min_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64

unsafe fn mul_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32

unsafe fn mul_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64

unsafe fn mullo_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32

Mullo is implemented for Sse2 by combining other Sse2 operations.

unsafe fn not_epi32(a: Self::Vi32) -> Self::Vi32

unsafe fn not_epi64(a: Self::Vi64) -> Self::Vi64

unsafe fn or_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32

unsafe fn or_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64

unsafe fn or_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32

unsafe fn or_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64

unsafe fn rcp_ps(a: Self::Vf32) -> Self::Vf32

unsafe fn round_ps(a: Self::Vf32) -> Self::Vf32

Round is implemented for Sse2 by combining other Sse2 operations.

unsafe fn round_pd(a: Self::Vf64) -> Self::Vf64

unsafe fn set1_epi32(a: i32) -> Self::Vi32

unsafe fn set1_epi64(a: i64) -> Self::Vi64

unsafe fn set1_ps(a: f32) -> Self::Vf32

unsafe fn set1_pd(a: f64) -> Self::Vf64

unsafe fn setzero_ps() -> Self::Vf32

unsafe fn setzero_pd() -> Self::Vf64

unsafe fn setzero_epi32() -> Self::Vi32

unsafe fn setzero_epi64() -> Self::Vi64

unsafe fn srai_epi32(a: Self::Vi32, amt_const: i32) -> Self::Vi32

amt must be a constant

unsafe fn srai_epi64(a: Self::Vi64, amt_const: i32) -> Self::Vi64

amt must be a constant

unsafe fn srli_epi32(a: Self::Vi32, amt_const: i32) -> Self::Vi32

amt must be a constant

unsafe fn slli_epi32(a: Self::Vi32, amt_const: i32) -> Self::Vi32

amt must be a constant

unsafe fn sra_epi32(a: Self::Vi32, amt: i32) -> Self::Vi32

amt does not have to be a constant, but may be slower than the srai version

unsafe fn srl_epi32(a: Self::Vi32, amt: i32) -> Self::Vi32

amt does not have to be a constant, but may be slower than the srli version

unsafe fn sll_epi32(a: Self::Vi32, amt: i32) -> Self::Vi32

amt does not have to be a constant, but may be slower than the slli version

unsafe fn sub_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32

unsafe fn sub_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32

unsafe fn sub_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64

unsafe fn sqrt_ps(a: Self::Vf32) -> Self::Vf32

unsafe fn rsqrt_ps(a: Self::Vf32) -> Self::Vf32

unsafe fn sqrt_pd(a: Self::Vf64) -> Self::Vf64

unsafe fn rsqrt_pd(a: Self::Vf64) -> Self::Vf64

unsafe fn shuffle_epi32(a: Self::Vi32, imm8: i32) -> Self::Vi32

unsafe fn xor_epi32(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32

unsafe fn xor_epi64(a: Self::Vi64, b: Self::Vi64) -> Self::Vi64

unsafe fn xor_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32

unsafe fn xor_pd(a: Self::Vf64, b: Self::Vf64) -> Self::Vf64

Loading content...

Implementors

impl Simd for Avx2[src]

type Vi16 = I16x16

type Vi32 = I32x8

type Vf32 = F32x8

type Vf64 = F64x4

type Vi64 = I64x4

impl Simd for Scalar[src]

type Vi16 = I16x1

type Vi32 = I32x1

type Vf32 = F32x1

type Vf64 = F64x1

type Vi64 = I64x1

impl Simd for Sse2[src]

type Vi16 = I16x8

type Vi32 = I32x4

type Vf32 = F32x4

type Vf64 = F64x2

type Vi64 = I64x2

impl Simd for Sse41[src]

type Vi16 = I16x8

type Vi32 = I32x4_41

type Vf32 = F32x4

type Vf64 = F64x2

type Vi64 = I64x2_41

Loading content...