#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
#[inline(always)]
pub(crate) fn _mm256_mul_round_ps(a: __m256, b: __m256) -> __m256 {
unsafe { _mm256_fmadd_ps(a, b, _mm256_set1_ps(0.5f32)) }
}
#[inline(always)]
pub(crate) fn _mm_mul_round_ps(a: __m128, b: __m128) -> __m128 {
unsafe { _mm_fmadd_ps(a, b, _mm_set1_ps(0.5f32)) }
}
#[inline(always)]
pub(crate) fn _mm256_mul_by_3_epi32(v: __m256i) -> __m256i {
unsafe { _mm256_add_epi32(_mm256_slli_epi32::<1>(v), v) }
}
#[inline(always)]
pub(crate) fn _mm256_opt_fnmlaf_ps<const FMA: bool>(a: __m256, b: __m256, c: __m256) -> __m256 {
unsafe {
if FMA {
_mm256_fnmadd_ps(b, c, a)
} else {
_mm256_sub_ps(a, _mm256_mul_ps(b, c))
}
}
}
#[inline(always)]
pub(crate) fn _mm256_opt_fmlaf_ps<const FMA: bool>(a: __m256, b: __m256, c: __m256) -> __m256 {
unsafe {
if FMA {
_mm256_fmadd_ps(b, c, a)
} else {
_mm256_add_ps(a, _mm256_mul_ps(b, c))
}
}
}
#[inline(always)]
pub(crate) fn _mm256_opt_fnmlsf_ps<const FMA: bool>(a: __m256, b: __m256, c: __m256) -> __m256 {
unsafe {
if FMA {
_mm256_fmsub_ps(b, c, a)
} else {
_mm256_sub_ps(_mm256_mul_ps(b, c), a)
}
}
}