#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))]
use super::config::SIMDInstructionSet;
#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))]
use super::generic::{impl_SIMDArgMinMax, impl_SIMDInit_Int};
#[cfg(any(
target_arch = "x86",
target_arch = "x86_64",
all(target_arch = "arm", feature = "nightly_simd"),
target_arch = "aarch64",
))]
use super::generic::{SIMDArgMinMax, SIMDInit, SIMDOps};
#[cfg(any(
target_arch = "x86",
target_arch = "x86_64",
all(target_arch = "arm", feature = "nightly_simd"),
target_arch = "aarch64",
))]
use crate::SCALAR;
#[cfg(target_arch = "aarch64")]
use std::arch::aarch64::*;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
#[cfg(any(
target_arch = "x86",
target_arch = "x86_64",
all(target_arch = "arm", feature = "nightly_simd"),
target_arch = "aarch64",
))]
use super::super::dtype_strategy::Int;
#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))]
const MAX_INDEX: usize = i64::MAX as usize;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod avx2 {
use super::super::config::AVX2;
use super::*;
const LANE_SIZE: usize = AVX2::<Int>::LANE_SIZE_64;
impl SIMDOps<i64, __m256i, __m256i, LANE_SIZE> for AVX2<Int> {
const INITIAL_INDEX: __m256i = unsafe { std::mem::transmute([0i64, 1i64, 2i64, 3i64]) };
const INDEX_INCREMENT: __m256i =
unsafe { std::mem::transmute([LANE_SIZE as i64; LANE_SIZE]) };
const MAX_INDEX: usize = MAX_INDEX;
#[inline(always)]
unsafe fn _reg_to_arr(reg: __m256i) -> [i64; LANE_SIZE] {
std::mem::transmute::<__m256i, [i64; LANE_SIZE]>(reg)
}
#[inline(always)]
unsafe fn _mm_loadu(data: *const i64) -> __m256i {
_mm256_loadu_si256(data as *const __m256i)
}
#[inline(always)]
unsafe fn _mm_add(a: __m256i, b: __m256i) -> __m256i {
_mm256_add_epi64(a, b)
}
#[inline(always)]
unsafe fn _mm_cmpgt(a: __m256i, b: __m256i) -> __m256i {
_mm256_cmpgt_epi64(a, b)
}
#[inline(always)]
unsafe fn _mm_cmplt(a: __m256i, b: __m256i) -> __m256i {
_mm256_cmpgt_epi64(b, a)
}
#[inline(always)]
unsafe fn _mm_blendv(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
_mm256_blendv_epi8(a, b, mask)
}
}
impl_SIMDInit_Int!(i64, __m256i, __m256i, LANE_SIZE, AVX2<Int>);
impl_SIMDArgMinMax!(
i64,
__m256i,
__m256i,
LANE_SIZE,
SCALAR<Int>,
AVX2<Int>,
"avx2"
);
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod sse {
use super::super::config::SSE;
use super::*;
const LANE_SIZE: usize = SSE::<Int>::LANE_SIZE_64;
impl SIMDOps<i64, __m128i, __m128i, LANE_SIZE> for SSE<Int> {
const INITIAL_INDEX: __m128i = unsafe { std::mem::transmute([0i64, 1i64]) };
const INDEX_INCREMENT: __m128i =
unsafe { std::mem::transmute([LANE_SIZE as i64; LANE_SIZE]) };
const MAX_INDEX: usize = MAX_INDEX;
#[inline(always)]
unsafe fn _reg_to_arr(reg: __m128i) -> [i64; LANE_SIZE] {
std::mem::transmute::<__m128i, [i64; LANE_SIZE]>(reg)
}
#[inline(always)]
unsafe fn _mm_loadu(data: *const i64) -> __m128i {
_mm_loadu_si128(data as *const __m128i)
}
#[inline(always)]
unsafe fn _mm_add(a: __m128i, b: __m128i) -> __m128i {
_mm_add_epi64(a, b)
}
#[inline(always)]
unsafe fn _mm_cmpgt(a: __m128i, b: __m128i) -> __m128i {
_mm_cmpgt_epi64(a, b)
}
#[inline(always)]
unsafe fn _mm_cmplt(a: __m128i, b: __m128i) -> __m128i {
_mm_cmpgt_epi64(b, a)
}
#[inline(always)]
unsafe fn _mm_blendv(a: __m128i, b: __m128i, mask: __m128i) -> __m128i {
_mm_blendv_epi8(a, b, mask)
}
}
impl_SIMDInit_Int!(i64, __m128i, __m128i, LANE_SIZE, SSE<Int>);
impl_SIMDArgMinMax!(
i64,
__m128i,
__m128i,
LANE_SIZE,
SCALAR<Int>,
SSE<Int>,
"sse4.2"
);
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[cfg(feature = "nightly_simd")]
mod avx512 {
use super::super::config::AVX512;
use super::*;
const LANE_SIZE: usize = AVX512::<Int>::LANE_SIZE_64;
impl SIMDOps<i64, __m512i, u8, LANE_SIZE> for AVX512<Int> {
const INITIAL_INDEX: __m512i =
unsafe { std::mem::transmute([0i64, 1i64, 2i64, 3i64, 4i64, 5i64, 6i64, 7i64]) };
const INDEX_INCREMENT: __m512i =
unsafe { std::mem::transmute([LANE_SIZE as i64; LANE_SIZE]) };
const MAX_INDEX: usize = MAX_INDEX;
#[inline(always)]
unsafe fn _reg_to_arr(reg: __m512i) -> [i64; LANE_SIZE] {
std::mem::transmute::<__m512i, [i64; LANE_SIZE]>(reg)
}
#[inline(always)]
unsafe fn _mm_loadu(data: *const i64) -> __m512i {
_mm512_loadu_epi64(data as *const i64)
}
#[inline(always)]
unsafe fn _mm_add(a: __m512i, b: __m512i) -> __m512i {
_mm512_add_epi64(a, b)
}
#[inline(always)]
unsafe fn _mm_cmpgt(a: __m512i, b: __m512i) -> u8 {
_mm512_cmpgt_epi64_mask(a, b)
}
#[inline(always)]
unsafe fn _mm_cmplt(a: __m512i, b: __m512i) -> u8 {
_mm512_cmpgt_epi64_mask(b, a)
}
#[inline(always)]
unsafe fn _mm_blendv(a: __m512i, b: __m512i, mask: u8) -> __m512i {
_mm512_mask_blend_epi64(mask, a, b)
}
}
impl_SIMDInit_Int!(i64, __m512i, u8, LANE_SIZE, AVX512<Int>);
impl_SIMDArgMinMax!(
i64,
__m512i,
u8,
LANE_SIZE,
SCALAR<Int>,
AVX512<Int>,
"avx512f"
);
}
#[cfg(target_arch = "arm")]
#[cfg(feature = "nightly_simd")]
mod neon {
use super::super::config::NEON;
use super::super::generic::{unimpl_SIMDArgMinMax, unimpl_SIMDInit, unimpl_SIMDOps};
use super::*;
unimpl_SIMDOps!(i64, usize, NEON<Int>);
unimpl_SIMDInit!(i64, usize, NEON<Int>);
unimpl_SIMDArgMinMax!(i64, usize, SCALAR<Int>, NEON<Int>);
}
#[cfg(target_arch = "aarch64")] mod neon {
use super::super::config::NEON;
use super::*;
const LANE_SIZE: usize = NEON::<Int>::LANE_SIZE_64;
impl SIMDOps<i64, int64x2_t, uint64x2_t, LANE_SIZE> for NEON<Int> {
const INITIAL_INDEX: int64x2_t = unsafe { std::mem::transmute([0i64, 1i64]) };
const INDEX_INCREMENT: int64x2_t =
unsafe { std::mem::transmute([LANE_SIZE as i64; LANE_SIZE]) };
const MAX_INDEX: usize = MAX_INDEX;
#[inline(always)]
unsafe fn _reg_to_arr(reg: int64x2_t) -> [i64; LANE_SIZE] {
std::mem::transmute::<int64x2_t, [i64; LANE_SIZE]>(reg)
}
#[inline(always)]
unsafe fn _mm_loadu(data: *const i64) -> int64x2_t {
vld1q_s64(data)
}
#[inline(always)]
unsafe fn _mm_add(a: int64x2_t, b: int64x2_t) -> int64x2_t {
vaddq_s64(a, b)
}
#[inline(always)]
unsafe fn _mm_cmpgt(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
vcgtq_s64(a, b)
}
#[inline(always)]
unsafe fn _mm_cmplt(a: int64x2_t, b: int64x2_t) -> uint64x2_t {
vcltq_s64(a, b)
}
#[inline(always)]
unsafe fn _mm_blendv(a: int64x2_t, b: int64x2_t, mask: uint64x2_t) -> int64x2_t {
vbslq_s64(mask, b, a)
}
}
impl_SIMDInit_Int!(i64, int64x2_t, uint64x2_t, LANE_SIZE, NEON<Int>);
impl_SIMDArgMinMax!(
i64,
int64x2_t,
uint64x2_t,
LANE_SIZE,
SCALAR<Int>,
NEON<Int>,
"neon"
);
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))]
#[cfg(test)]
mod tests {
use rstest::rstest;
use rstest_reuse::{self, *};
use std::marker::PhantomData;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[cfg(feature = "nightly_simd")]
use crate::simd::config::AVX512;
#[cfg(target_arch = "aarch64")]
use crate::simd::config::NEON;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use crate::simd::config::{AVX2, SSE};
use crate::{Int, SIMDArgMinMax, SCALAR};
use super::super::test_utils::{
test_first_index_identical_values_argminmax, test_return_same_result_argminmax,
};
use dev_utils::utils;
fn get_array_i64(n: usize) -> Vec<i64> {
utils::SampleUniformFullRange::get_random_array(n)
}
const SCALAR_STRATEGY: SCALAR<Int> = SCALAR {
_dtype_strategy: PhantomData::<Int>,
};
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[template]
#[rstest]
#[case::sse(SSE {_dtype_strategy: PhantomData::<Int>}, is_x86_feature_detected!("sse4.2"))]
#[case::avx2(AVX2 {_dtype_strategy: PhantomData::<Int>}, is_x86_feature_detected!("avx2"))]
#[cfg_attr(feature = "nightly_simd", case::avx512(AVX512 {_dtype_strategy: PhantomData::<Int>}, is_x86_feature_detected!("avx512f")))]
fn simd_implementations<T, SIMDV, SIMDM, const LANE_SIZE: usize>(
#[case] simd: T,
#[case] simd_available: bool,
) {
}
#[cfg(target_arch = "aarch64")]
#[template]
#[rstest]
#[case::neon(NEON {_dtype_strategy: PhantomData::<Int>}, true)]
fn simd_implementations<T, SIMDV, SIMDM, const LANE_SIZE: usize>(
#[case] simd: T,
#[case] simd_available: bool,
) {
}
#[apply(simd_implementations)]
fn test_first_index_is_returned_when_identical_values_found<
T,
SIMDV,
SIMDM,
const LANE_SIZE: usize,
>(
#[case] simd: T,
#[case] simd_available: bool,
) where
T: SIMDArgMinMax<i64, SIMDV, SIMDM, LANE_SIZE, SCALAR<Int>>,
SIMDV: Copy,
SIMDM: Copy,
{
if !simd_available {
return;
}
test_first_index_identical_values_argminmax(SCALAR_STRATEGY, simd);
}
#[apply(simd_implementations)]
fn test_return_same_result<T, SIMDV, SIMDM, const LANE_SIZE: usize>(
#[case] simd: T,
#[case] simd_available: bool,
) where
T: SIMDArgMinMax<i64, SIMDV, SIMDM, LANE_SIZE, SCALAR<Int>>,
SIMDV: Copy,
SIMDM: Copy,
{
if !simd_available {
return;
}
test_return_same_result_argminmax(get_array_i64, SCALAR_STRATEGY, simd);
}
}