#![cfg(target_feature = "avx")]
use super::*;
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn add_m256d(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_add_pd(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn add_m256(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_add_ps(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn addsub_m256d(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_addsub_pd(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn addsub_m256(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_addsub_ps(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn bitand_m256d(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_and_pd(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn bitand_m256(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_and_ps(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn bitandnot_m256d(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_andnot_pd(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn bitandnot_m256(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_andnot_ps(a.0, b.0) })
}
pub fn blend_m256d<const IMM: i32>(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_blend_pd(a.0, b.0, IMM) })
}
pub fn blend_m256<const IMM: i32>(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_blend_ps(a.0, b.0, IMM) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn blend_varying_m256d(a: m256d, b: m256d, mask: m256d) -> m256d {
m256d(unsafe { _mm256_blendv_pd(a.0, b.0, mask.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn blend_varying_m256(a: m256, b: m256, mask: m256) -> m256 {
m256(unsafe { _mm256_blendv_ps(a.0, b.0, mask.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_m128d_splat_m256d(a: &m128d) -> m256d {
m256d(unsafe { _mm256_broadcast_pd(&a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_m128_splat_m256(a: &m128) -> m256 {
m256(unsafe { _mm256_broadcast_ps(&a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_f64_splat_m256d(a: &f64) -> m256d {
m256d(unsafe { _mm256_broadcast_sd(a) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_f32_splat_m256(a: &f32) -> m256 {
m256(unsafe { _mm256_broadcast_ss(a) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn cast_to_m256_from_m256d(a: m256d) -> m256 {
m256(unsafe { _mm256_castpd_ps(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn cast_to_m256i_from_m256d(a: m256d) -> m256i {
m256i(unsafe { _mm256_castpd_si256(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn cast_to_m256d_from_m256(a: m256) -> m256d {
m256d(unsafe { _mm256_castps_pd(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn cast_to_m256i_from_m256(a: m256) -> m256i {
m256i(unsafe { _mm256_castps_si256(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn cast_to_m256d_from_m256i(a: m256i) -> m256d {
m256d(unsafe { _mm256_castsi256_pd(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn cast_to_m256_from_m256i(a: m256i) -> m256 {
m256(unsafe { _mm256_castsi256_ps(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn cast_to_m128_from_m256(a: m256) -> m128 {
m128(unsafe { _mm256_castps256_ps128(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn cast_to_m128d_from_m256d(a: m256d) -> m128d {
m128d(unsafe { _mm256_castpd256_pd128(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn cast_to_m128i_from_m256i(a: m256i) -> m128i {
m128i(unsafe { _mm256_castsi256_si128(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn ceil_m256d(a: m256d) -> m256d {
m256d(unsafe { _mm256_ceil_pd(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn ceil_m256(a: m256) -> m256 {
m256(unsafe { _mm256_ceil_ps(a.0) })
}
#[macro_export]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
macro_rules! cmp_op {
(EqualOrdered) => {{
#[cfg(target_arch = "x86")]
use ::core::arch::x86::_CMP_EQ_OQ;
#[cfg(target_arch = "x86_64")]
use ::core::arch::x86_64::_CMP_EQ_OQ;
_CMP_EQ_OQ
}};
(EqualUnordered) => {{
#[cfg(target_arch = "x86")]
use ::core::arch::x86::_CMP_EQ_UQ;
#[cfg(target_arch = "x86_64")]
use ::core::arch::x86_64::_CMP_EQ_UQ;
_CMP_EQ_UQ
}};
(False) => {{
#[cfg(target_arch = "x86")]
use ::core::arch::x86::_CMP_FALSE_OQ;
#[cfg(target_arch = "x86_64")]
use ::core::arch::x86_64::_CMP_FALSE_OQ;
_CMP_FALSE_OQ
}};
(GreaterEqualOrdered) => {{
#[cfg(target_arch = "x86")]
use ::core::arch::x86::_CMP_GE_OQ;
#[cfg(target_arch = "x86_64")]
use ::core::arch::x86_64::_CMP_GE_OQ;
_CMP_GE_OQ
}};
(GreaterThanOrdered) => {{
#[cfg(target_arch = "x86")]
use ::core::arch::x86::_CMP_GT_OQ;
#[cfg(target_arch = "x86_64")]
use ::core::arch::x86_64::_CMP_GT_OQ;
_CMP_GT_OQ
}};
(LessEqualOrdered) => {{
#[cfg(target_arch = "x86")]
use ::core::arch::x86::_CMP_LE_OQ;
#[cfg(target_arch = "x86_64")]
use ::core::arch::x86_64::_CMP_LE_OQ;
_CMP_LE_OQ
}};
(LessThanOrdered) => {{
#[cfg(target_arch = "x86")]
use ::core::arch::x86::_CMP_LT_OQ;
#[cfg(target_arch = "x86_64")]
use ::core::arch::x86_64::_CMP_LT_OQ;
_CMP_LT_OQ
}};
(NotEqualOrdered) => {{
#[cfg(target_arch = "x86")]
use ::core::arch::x86::_CMP_NEQ_OQ;
#[cfg(target_arch = "x86_64")]
use ::core::arch::x86_64::_CMP_NEQ_OQ;
_CMP_NEQ_OQ
}};
(NotEqualUnordered) => {{
#[cfg(target_arch = "x86")]
use ::core::arch::x86::_CMP_NEQ_UQ;
#[cfg(target_arch = "x86_64")]
use ::core::arch::x86_64::_CMP_NEQ_UQ;
_CMP_NEQ_UQ
}};
(NotGreaterEqualUnordered) => {{
#[cfg(target_arch = "x86")]
use ::core::arch::x86::_CMP_NGE_UQ;
#[cfg(target_arch = "x86_64")]
use ::core::arch::x86_64::_CMP_NGE_UQ;
_CMP_NGE_UQ
}};
(NotGreaterThanUnordered) => {{
#[cfg(target_arch = "x86")]
use ::core::arch::x86::_CMP_NGT_UQ;
#[cfg(target_arch = "x86_64")]
use ::core::arch::x86_64::_CMP_NGT_UQ;
_CMP_NGT_UQ
}};
(NotLessEqualUnordered) => {{
#[cfg(target_arch = "x86")]
use ::core::arch::x86::_CMP_NLE_UQ;
#[cfg(target_arch = "x86_64")]
use ::core::arch::x86_64::_CMP_NLE_UQ;
_CMP_NLE_UQ
}};
(NotLessThanUnordered) => {{
#[cfg(target_arch = "x86")]
use ::core::arch::x86::_CMP_NLT_UQ;
#[cfg(target_arch = "x86_64")]
use ::core::arch::x86_64::_CMP_NLT_UQ;
_CMP_NLT_UQ
}};
(Ordered) => {{
#[cfg(target_arch = "x86")]
use ::core::arch::x86::_CMP_ORD_Q;
#[cfg(target_arch = "x86_64")]
use ::core::arch::x86_64::_CMP_ORD_Q;
_CMP_ORD_Q
}};
(True) => {{
#[cfg(target_arch = "x86")]
use ::core::arch::x86::_CMP_TRUE_UQ;
#[cfg(target_arch = "x86_64")]
use ::core::arch::x86_64::_CMP_TRUE_UQ;
_CMP_TRUE_UQ
}};
(Unordered) => {{
#[cfg(target_arch = "x86")]
use ::core::arch::x86::_CMP_UNORD_Q;
#[cfg(target_arch = "x86_64")]
use ::core::arch::x86_64::_CMP_UNORD_Q;
_CMP_UNORD_Q
}};
($unknown_op:tt) => {{
compile_error!("The operation name given is invalid.");
}};
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn cmp_op_mask_m128<const OP: i32>(a: m128, b: m128) -> m128 {
m128(unsafe { _mm_cmp_ps(a.0, b.0, OP) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn cmp_op_mask_m128_s<const OP: i32>(a: m128, b: m128) -> m128 {
m128(unsafe { _mm_cmp_ss(a.0, b.0, OP) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn cmp_op_mask_m256<const OP: i32>(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_cmp_ps(a.0, b.0, OP) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn cmp_op_mask_m128d<const OP: i32>(a: m128d, b: m128d) -> m128d {
m128d(unsafe { _mm_cmp_pd(a.0, b.0, OP) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn cmp_op_mask_m128d_s<const OP: i32>(a: m128d, b: m128d) -> m128d {
m128d(unsafe { _mm_cmp_sd(a.0, b.0, OP) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn cmp_op_mask_m256d<const OP: i32>(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_cmp_pd(a.0, b.0, OP) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn convert_to_m256d_from_i32_m128i(a: m128i) -> m256d {
m256d(unsafe { _mm256_cvtepi32_pd(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn convert_to_m256_from_i32_m256i(a: m256i) -> m256 {
m256(unsafe { _mm256_cvtepi32_ps(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn convert_to_i32_m128i_from_m256d(a: m256d) -> m128i {
m128i(unsafe { _mm256_cvtpd_epi32(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn convert_to_m128_from_m256d(a: m256d) -> m128 {
m128(unsafe { _mm256_cvtpd_ps(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn convert_to_i32_m256i_from_m256(a: m256) -> m256i {
m256i(unsafe { _mm256_cvtps_epi32(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn convert_to_m256d_from_m128(a: m128) -> m256d {
m256d(unsafe { _mm256_cvtps_pd(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn convert_to_f64_from_m256d_s(a: m256d) -> f64 {
unsafe { _mm256_cvtsd_f64(a.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn convert_to_i32_from_m256i_s(a: m256i) -> i32 {
unsafe { _mm256_cvtsi256_si32(a.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn convert_to_f32_from_m256_s(a: m256) -> f32 {
unsafe { _mm256_cvtss_f32(a.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn convert_truncate_to_i32_m128i_from_m256d(a: m256d) -> m128i {
m128i(unsafe { _mm256_cvttpd_epi32(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn convert_truncate_to_i32_m256i_from_m256(a: m256) -> m256i {
m256i(unsafe { _mm256_cvttps_epi32(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn div_m256d(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_div_pd(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn div_m256(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_div_ps(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn dot_product_m256<const IMM: i32>(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_dp_ps(a.0, b.0, IMM) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn extract_i32_from_m256i<const IMM: i32>(a: m256i) -> i32 {
unsafe { _mm256_extract_epi32(a.0, IMM) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[cfg(target_arch = "x86_64")]
pub fn extract_i64_from_m256i<const IMM: i32>(a: m256i) -> i64 {
unsafe { _mm256_extract_epi64(a.0, IMM) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn extract_m128d_from_m256d<const IMM: i32>(a: m256d) -> m128d {
m128d(unsafe { _mm256_extractf128_pd(a.0, IMM) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn extract_m128_from_m256<const IMM: i32>(a: m256) -> m128 {
m128(unsafe { _mm256_extractf128_ps(a.0, IMM) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn extract_m128i_from_m256i<const IMM: i32>(a: m256i) -> m128i {
m128i(unsafe { _mm256_extractf128_si256(a.0, IMM) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn floor_m256d(a: m256d) -> m256d {
m256d(unsafe { _mm256_floor_pd(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn floor_m256(a: m256) -> m256 {
m256(unsafe { _mm256_floor_ps(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn add_horizontal_m256d(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_hadd_pd(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn add_horizontal_m256(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_hadd_ps(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn sub_horizontal_m256d(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_hsub_pd(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn sub_horizontal_m256(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_hsub_ps(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn insert_i8_to_m256i<const IMM: i32>(a: m256i, i: i8) -> m256i {
m256i(unsafe { _mm256_insert_epi8(a.0, i, IMM) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn insert_i16_to_m256i<const IMM: i32>(a: m256i, i: i16) -> m256i {
m256i(unsafe { _mm256_insert_epi16(a.0, i, IMM) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn insert_i32_to_m256i<const IMM: i32>(a: m256i, i: i32) -> m256i {
m256i(unsafe { _mm256_insert_epi32(a.0, i, IMM) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[cfg(target_arch = "x86_64")]
pub fn insert_i64_to_m256i<const IMM: i32>(a: m256i, i: i64) -> m256i {
m256i(unsafe { _mm256_insert_epi64(a.0, i, IMM) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn insert_m128d_to_m256d<const IMM: i32>(a: m256d, b: m128d) -> m256d {
m256d(unsafe { _mm256_insertf128_pd(a.0, b.0, IMM) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn insert_m128_to_m256<const IMM: i32>(a: m256, b: m128) -> m256 {
m256(unsafe { _mm256_insertf128_ps(a.0, b.0, IMM) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn insert_m128i_to_m256i_slow_avx<const IMM: i32>(a: m256i, b: m128i) -> m256i {
m256i(unsafe { _mm256_insertf128_si256(a.0, b.0, IMM) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_m256d(a: &m256d) -> m256d {
m256d(unsafe { _mm256_load_pd(a as *const m256d as *const f64) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_m256(a: &m256) -> m256 {
m256(unsafe { _mm256_load_ps(a as *const m256 as *const f32) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_m256i(a: &m256i) -> m256i {
m256i(unsafe { _mm256_load_si256(a as *const m256i as *const __m256i) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_unaligned_m256d(a: &[f64; 4]) -> m256d {
m256d(unsafe { _mm256_loadu_pd(a as *const [f64; 4] as *const f64) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_unaligned_m256(a: &[f32; 8]) -> m256 {
m256(unsafe { _mm256_loadu_ps(a as *const [f32; 8] as *const f32) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_unaligned_m256i(a: &[i8; 32]) -> m256i {
m256i(unsafe { _mm256_loadu_si256(a as *const [i8; 32] as *const __m256i) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_unaligned_hi_lo_m256d(a: &[f64; 2], b: &[f64; 2]) -> m256d {
m256d(unsafe { _mm256_loadu2_m128d(a as *const [f64; 2] as *const f64, b as *const [f64; 2] as *const f64) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_unaligned_hi_lo_m256(a: &[f32; 4], b: &[f32; 4]) -> m256 {
m256(unsafe { _mm256_loadu2_m128(a as *const [f32; 4] as *const f32, b as *const [f32; 4] as *const f32) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_unaligned_hi_lo_m256i(a: &[i8; 16], b: &[i8; 16]) -> m256i {
m256i(unsafe { _mm256_loadu2_m128i(a as *const [i8; 16] as *const __m128i, b as *const [i8; 16] as *const __m128i) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_masked_m128d(a: &m128d, mask: m128i) -> m128d {
m128d(unsafe { _mm_maskload_pd(a as *const m128d as *const f64, mask.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_masked_m256d(a: &m256d, mask: m256i) -> m256d {
m256d(unsafe { _mm256_maskload_pd(a as *const m256d as *const f64, mask.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_masked_m128(a: &m128, mask: m128i) -> m128 {
m128(unsafe { _mm_maskload_ps(a as *const m128 as *const f32, mask.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn load_masked_m256(a: &m256, mask: m256i) -> m256 {
m256(unsafe { _mm256_maskload_ps(a as *const m256 as *const f32, mask.0) })
}
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn store_masked_m128d(addr: &mut m128d, mask: m128i, a: m128d) {
unsafe { _mm_maskstore_pd(addr as *mut m128d as *mut f64, mask.0, a.0) }
}
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn store_masked_m256d(addr: &mut m256d, mask: m256i, a: m256d) {
unsafe { _mm256_maskstore_pd(addr as *mut m256d as *mut f64, mask.0, a.0) }
}
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn store_masked_m128(addr: &mut m128, mask: m128i, a: m128) {
unsafe { _mm_maskstore_ps(addr as *mut m128 as *mut f32, mask.0, a.0) }
}
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn store_masked_m256(addr: &mut m256, mask: m256i, a: m256) {
unsafe { _mm256_maskstore_ps(addr as *mut m256 as *mut f32, mask.0, a.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn max_m256d(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_max_pd(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn max_m256(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_max_ps(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn min_m256d(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_min_pd(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn min_m256(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_min_ps(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn duplicate_odd_lanes_m256d(a: m256d) -> m256d {
m256d(unsafe { _mm256_movedup_pd(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn duplicate_even_lanes_m256(a: m256) -> m256 {
m256(unsafe { _mm256_movehdup_ps(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn duplicate_odd_lanes_m256(a: m256) -> m256 {
m256(unsafe { _mm256_moveldup_ps(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn move_mask_m256d(a: m256d) -> i32 {
unsafe { _mm256_movemask_pd(a.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn testz_m256(a: m256, b: m256) -> i32 {
unsafe { _mm256_testz_ps(a.0, b.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn testz_m128(a: m128, b: m128) -> i32 {
unsafe { _mm_testz_ps(a.0, b.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn testc_m256(a: m256, b: m256) -> i32 {
unsafe { _mm256_testc_ps(a.0, b.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn testc_m128(a: m128, b: m128) -> i32 {
unsafe { _mm_testc_ps(a.0, b.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn testz_m256d(a: m256d, b: m256d) -> i32 {
unsafe { _mm256_testz_pd(a.0, b.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn testz_m128d(a: m128d, b: m128d) -> i32 {
unsafe { _mm_testz_pd(a.0, b.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn testc_m256d(a: m256d, b: m256d) -> i32 {
unsafe { _mm256_testc_pd(a.0, b.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn testc_m128d(a: m128d, b: m128d) -> i32 {
unsafe { _mm_testc_pd(a.0, b.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn testz_m256i(a: m256i, b: m256i) -> i32 {
unsafe { _mm256_testz_si256(a.0, b.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn testc_m256i(a: m256i, b: m256i) -> i32 {
unsafe { _mm256_testc_si256(a.0, b.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn move_mask_m256(a: m256) -> i32 {
unsafe { _mm256_movemask_ps(a.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn mul_m256d(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_mul_pd(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn mul_m256(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_mul_ps(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn bitor_m256d(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_or_pd(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn bitor_m256(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_or_ps(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn permute_m128d<const MASK: i32>(a: m128d) -> m128d {
m128d(unsafe { _mm_permute_pd(a.0, MASK) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn permute_m256d<const MASK: i32>(a: m256d) -> m256d {
m256d(unsafe { _mm256_permute_pd(a.0, MASK) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn permute_m128<const MASK: i32>(a: m128) -> m128 {
m128(unsafe { _mm_permute_ps(a.0, MASK) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn permute_m256<const MASK: i32>(a: m256) -> m256 {
m256(unsafe { _mm256_permute_ps(a.0, MASK) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn permute2z_m256d<const MASK: i32>(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_permute2f128_pd(a.0, b.0, MASK) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn permute2z_m256<const MASK: i32>(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_permute2f128_ps(a.0, b.0, MASK) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn permute2z_m256i<const MASK: i32>(a: m256i, b: m256i) -> m256i {
m256i(unsafe { _mm256_permute2f128_si256(a.0, b.0, MASK) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn shuffle_av_f64_all_m128d(a: m128d, v: m128i) -> m128d {
m128d(unsafe { _mm_permutevar_pd(a.0, v.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn shuffle_av_f64_half_m256d(a: m256d, b: m256i) -> m256d {
m256d(unsafe { _mm256_permutevar_pd(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn shuffle_av_f32_all_m128(a: m128, v: m128i) -> m128 {
m128(unsafe { _mm_permutevar_ps(a.0, v.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn shuffle_av_f32_half_m256(a: m256, v: m256i) -> m256 {
m256(unsafe { _mm256_permutevar_ps(a.0, v.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn reciprocal_m256(a: m256) -> m256 {
m256(unsafe { _mm256_rcp_ps(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn round_m256d<const OP: i32>(a: m256d) -> m256d {
m256d(unsafe { _mm256_round_pd(a.0, OP) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn round_m256<const OP: i32>(a: m256) -> m256 {
m256(unsafe { _mm256_round_ps(a.0, OP) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn reciprocal_sqrt_m256(a: m256) -> m256 {
m256(unsafe { _mm256_rsqrt_ps(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[rustfmt::skip]
pub fn set_i8_m256i(
e31: i8, e30: i8, e29: i8, e28: i8, e27: i8, e26: i8, e25: i8, e24: i8, e23: i8, e22: i8, e21: i8, e20: i8, e19: i8, e18: i8, e17: i8, e16: i8, e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8, e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8
) -> m256i {
m256i(unsafe {
_mm256_set_epi8(
e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0
)
})
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[rustfmt::skip]
pub fn set_i16_m256i(
e15: i16, e14: i16, e13: i16, e12: i16, e11: i16, e10: i16, e9: i16, e8: i16,
e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16,
) -> m256i {
m256i(unsafe {
_mm256_set_epi16(
e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
)
})
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[rustfmt::skip]
pub fn set_i32_m256i(
e7: i32, e6: i32, e5: i32, e4: i32, e3: i32, e2: i32, e1: i32, e0: i32,
) -> m256i {
m256i(unsafe {
_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0)
})
}
#[must_use]
#[inline(always)]
#[cfg(target_arch = "x86_64")]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[cfg(target_arch = "x86_64")]
pub fn set_i64_m256i(e3: i64, e2: i64, e1: i64, e0: i64) -> m256i {
m256i(unsafe { _mm256_set_epi64x(e3, e2, e1, e0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn set_m128_m256(high: m128, low: m128) -> m256 {
m256(unsafe { _mm256_set_m128(high.0, low.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[rustfmt::skip]
pub fn set_m128d_m256d(
high: m128d, low: m128d
) -> m256d {
m256d(unsafe { _mm256_set_m128d(high.0, low.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[rustfmt::skip]
pub fn set_m128i_m256i(
hi: m128i, lo: m128i
) -> m256i {
m256i(unsafe { _mm256_set_m128i(hi.0, lo.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[rustfmt::skip]
pub fn set_m256d(
e3: f64, e2: f64, e1: f64, e0: f64,
) -> m256d {
m256d(unsafe { _mm256_set_pd(e3, e2, e1, e0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[rustfmt::skip]
pub fn set_m256(
e7: f32, e6: f32, e5: f32, e4: f32, e3: f32, e2: f32, e1: f32, e0: f32,
) -> m256 {
m256(unsafe {
_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0)
})
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn set_splat_i8_m256i(i: i8) -> m256i {
m256i(unsafe { _mm256_set1_epi8(i) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn set_splat_i16_m256i(i: i16) -> m256i {
m256i(unsafe { _mm256_set1_epi16(i) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn set_splat_i32_m256i(i: i32) -> m256i {
m256i(unsafe { _mm256_set1_epi32(i) })
}
#[must_use]
#[inline(always)]
#[cfg(target_arch = "x86_64")]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn set_splat_i64_m256i(i: i64) -> m256i {
m256i(unsafe { _mm256_set1_epi64x(i) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn set_splat_m256d(f: f64) -> m256d {
m256d(unsafe { _mm256_set1_pd(f) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[rustfmt::skip]
pub fn set_splat_m256(
f: f32,
) -> m256 {
m256(unsafe {
_mm256_set1_ps(f)
})
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[rustfmt::skip]
pub fn set_reversed_i8_m256i(
e31: i8, e30: i8, e29: i8, e28: i8, e27: i8, e26: i8, e25: i8, e24: i8, e23: i8, e22: i8, e21: i8, e20: i8, e19: i8, e18: i8, e17: i8, e16: i8, e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8, e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8
) -> m256i {
m256i(unsafe {
_mm256_setr_epi8(
e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0
)
})
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[rustfmt::skip]
pub fn set_reversed_i16_m256i(
e15: i16, e14: i16, e13: i16, e12: i16, e11: i16, e10: i16, e9: i16, e8: i16,
e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16,
) -> m256i {
m256i(unsafe {
_mm256_setr_epi16(
e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
)
})
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[rustfmt::skip]
pub fn set_reversed_i32_m256i(
e7: i32, e6: i32, e5: i32, e4: i32, e3: i32, e2: i32, e1: i32, e0: i32,
) -> m256i {
m256i(unsafe {
_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0)
})
}
#[must_use]
#[inline(always)]
#[cfg(target_arch = "x86_64")]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn set_reversed_i64_m256i(e3: i64, e2: i64, e1: i64, e0: i64) -> m256i {
m256i(unsafe { _mm256_setr_epi64x(e3, e2, e1, e0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn set_reversed_m128_m256(hi: m128, lo: m128) -> m256 {
m256(unsafe { _mm256_setr_m128(hi.0, lo.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[rustfmt::skip]
pub fn set_reversed_m128d_m256d(
hi: m128d, lo: m128d
) -> m256d {
m256d(unsafe { _mm256_setr_m128d(hi.0, lo.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[rustfmt::skip]
pub fn set_reversed_m128i_m256i(
hi: m128i, lo: m128i
) -> m256i {
m256i(unsafe { _mm256_setr_m128i(hi.0, lo.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[rustfmt::skip]
pub fn set_reversed_m256d(
e3: f64, e2: f64, e1: f64, e0: f64,
) -> m256d {
m256d(unsafe { _mm256_setr_pd(e3, e2, e1, e0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
#[rustfmt::skip]
pub fn set_reversed_m256(
e7: f32, e6: f32, e5: f32, e4: f32, e3: f32, e2: f32, e1: f32, e0: f32,
) -> m256 {
m256(unsafe {
_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0)
})
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn zeroed_m256d() -> m256d {
m256d(unsafe { _mm256_setzero_pd() })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn zeroed_m256() -> m256 {
m256(unsafe { _mm256_setzero_ps() })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn zeroed_m256i() -> m256i {
m256i(unsafe { _mm256_setzero_si256() })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn shuffle_m256d<const IMM: i32>(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_shuffle_pd(a.0, b.0, IMM) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn shuffle_m256<const IMM: i32>(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_shuffle_ps(a.0, b.0, IMM) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn sqrt_m256d(a: m256d) -> m256d {
m256d(unsafe { _mm256_sqrt_pd(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn sqrt_m256(a: m256) -> m256 {
m256(unsafe { _mm256_sqrt_ps(a.0) })
}
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn store_m256d(addr: &mut m256d, a: m256d) {
unsafe { _mm256_store_pd(addr as *mut m256d as *mut f64, a.0) }
}
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn store_m256(addr: &mut m256, a: m256) {
unsafe { _mm256_store_ps(addr as *mut m256 as *mut f32, a.0) }
}
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn store_m256i(addr: &mut m256i, a: m256i) {
unsafe { _mm256_store_si256(addr as *mut m256i as *mut __m256i, a.0) }
}
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn store_unaligned_m256d(addr: &mut [f64; 4], a: m256d) {
unsafe { _mm256_storeu_pd(addr.as_mut_ptr(), a.0) }
}
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn store_unaligned_m256(addr: &mut [f32; 8], a: m256) {
unsafe { _mm256_storeu_ps(addr.as_mut_ptr(), a.0) }
}
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn store_unaligned_m256i(addr: &mut [i8; 32], a: m256i) {
unsafe { _mm256_storeu_si256(addr as *mut [i8; 32] as *mut __m256i, a.0) }
}
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn store_unaligned_hi_lo_m256d(hi_addr: &mut [f64; 2], lo_addr: &mut [f64; 2], a: m256d) {
unsafe { _mm256_storeu2_m128d(hi_addr.as_mut_ptr(), lo_addr.as_mut_ptr(), a.0) }
}
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn store_unaligned_hi_lo_m256(hi_addr: &mut [f32; 4], lo_addr: &mut [f32; 4], a: m256) {
unsafe { _mm256_storeu2_m128(hi_addr.as_mut_ptr(), lo_addr.as_mut_ptr(), a.0) }
}
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn store_unaligned_hi_lo_m256i(hi_addr: &mut [i8; 16], lo_addr: &mut [i8; 16], a: m256i) {
unsafe { _mm256_storeu2_m128i(hi_addr.as_mut_ptr().cast(), lo_addr.as_mut_ptr().cast(), a.0) }
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn sub_m256d(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_sub_pd(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn sub_m256(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_sub_ps(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn unpack_hi_m256d(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_unpackhi_pd(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn unpack_hi_m256(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_unpackhi_ps(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn unpack_lo_m256d(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_unpacklo_pd(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn unpack_lo_m256(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_unpacklo_ps(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn bitxor_m256d(a: m256d, b: m256d) -> m256d {
m256d(unsafe { _mm256_xor_pd(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn bitxor_m256(a: m256, b: m256) -> m256 {
m256(unsafe { _mm256_xor_ps(a.0, b.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn zero_extend_m128d(a: m128d) -> m256d {
m256d(unsafe { _mm256_zextpd128_pd256(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn zero_extend_m128(a: m128) -> m256 {
m256(unsafe { _mm256_zextps128_ps256(a.0) })
}
#[must_use]
#[inline(always)]
#[cfg_attr(docs_rs, doc(cfg(target_feature = "avx")))]
pub fn zero_extend_m128i(a: m128i) -> m256i {
m256i(unsafe { _mm256_zextsi128_si256(a.0) })
}
impl Add for m256d {
type Output = Self;
#[must_use]
#[inline(always)]
fn add(self, rhs: Self) -> Self {
add_m256d(self, rhs)
}
}
impl AddAssign for m256d {
#[inline(always)]
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
impl BitAnd for m256d {
type Output = Self;
#[must_use]
#[inline(always)]
fn bitand(self, rhs: Self) -> Self {
bitand_m256d(self, rhs)
}
}
impl BitAndAssign for m256d {
#[inline(always)]
fn bitand_assign(&mut self, rhs: Self) {
*self = *self & rhs;
}
}
impl BitOr for m256d {
type Output = Self;
#[must_use]
#[inline(always)]
fn bitor(self, rhs: Self) -> Self {
bitor_m256d(self, rhs)
}
}
impl BitOrAssign for m256d {
#[inline(always)]
fn bitor_assign(&mut self, rhs: Self) {
*self = *self | rhs;
}
}
impl BitXor for m256d {
type Output = Self;
#[must_use]
#[inline(always)]
fn bitxor(self, rhs: Self) -> Self {
bitxor_m256d(self, rhs)
}
}
impl BitXorAssign for m256d {
#[inline(always)]
fn bitxor_assign(&mut self, rhs: Self) {
*self = *self ^ rhs;
}
}
impl Div for m256d {
type Output = Self;
#[must_use]
#[inline(always)]
fn div(self, rhs: Self) -> Self {
div_m256d(self, rhs)
}
}
impl DivAssign for m256d {
#[inline(always)]
fn div_assign(&mut self, rhs: Self) {
*self = *self / rhs;
}
}
impl Mul for m256d {
type Output = Self;
#[must_use]
#[inline(always)]
fn mul(self, rhs: Self) -> Self {
mul_m256d(self, rhs)
}
}
impl MulAssign for m256d {
#[inline(always)]
fn mul_assign(&mut self, rhs: Self) {
*self = *self * rhs;
}
}
impl Neg for m256d {
type Output = Self;
#[must_use]
#[inline(always)]
fn neg(self) -> Self {
sub_m256d(zeroed_m256d(), self)
}
}
impl Not for m256d {
type Output = Self;
#[must_use]
#[inline(always)]
fn not(self) -> Self {
let all_bits = set_splat_m256d(f64::from_bits(u64::MAX));
self ^ all_bits
}
}
impl Sub for m256d {
type Output = Self;
#[must_use]
#[inline(always)]
fn sub(self, rhs: Self) -> Self {
sub_m256d(self, rhs)
}
}
impl SubAssign for m256d {
#[inline(always)]
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}
impl PartialEq for m256d {
#[must_use]
#[inline(always)]
fn eq(&self, other: &Self) -> bool {
let mask = m256d(unsafe { _mm256_cmp_pd(self.0, other.0, _CMP_EQ_OQ) });
move_mask_m256d(mask) == 0b1111
}
}
impl Add for m256 {
type Output = Self;
#[must_use]
#[inline(always)]
fn add(self, rhs: Self) -> Self {
add_m256(self, rhs)
}
}
impl AddAssign for m256 {
#[inline(always)]
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
impl BitAnd for m256 {
type Output = Self;
#[must_use]
#[inline(always)]
fn bitand(self, rhs: Self) -> Self {
bitand_m256(self, rhs)
}
}
impl BitAndAssign for m256 {
#[inline(always)]
fn bitand_assign(&mut self, rhs: Self) {
*self = *self & rhs;
}
}
impl BitOr for m256 {
type Output = Self;
#[must_use]
#[inline(always)]
fn bitor(self, rhs: Self) -> Self {
bitor_m256(self, rhs)
}
}
impl BitOrAssign for m256 {
#[inline(always)]
fn bitor_assign(&mut self, rhs: Self) {
*self = *self | rhs;
}
}
impl BitXor for m256 {
type Output = Self;
#[must_use]
#[inline(always)]
fn bitxor(self, rhs: Self) -> Self {
bitxor_m256(self, rhs)
}
}
impl BitXorAssign for m256 {
#[inline(always)]
fn bitxor_assign(&mut self, rhs: Self) {
*self = *self ^ rhs;
}
}
impl Div for m256 {
type Output = Self;
#[must_use]
#[inline(always)]
fn div(self, rhs: Self) -> Self {
div_m256(self, rhs)
}
}
impl DivAssign for m256 {
#[inline(always)]
fn div_assign(&mut self, rhs: Self) {
*self = *self / rhs;
}
}
impl Mul for m256 {
type Output = Self;
#[must_use]
#[inline(always)]
fn mul(self, rhs: Self) -> Self {
mul_m256(self, rhs)
}
}
impl MulAssign for m256 {
#[inline(always)]
fn mul_assign(&mut self, rhs: Self) {
*self = *self * rhs;
}
}
impl Neg for m256 {
type Output = Self;
#[must_use]
#[inline(always)]
fn neg(self) -> Self {
sub_m256(zeroed_m256(), self)
}
}
impl Not for m256 {
type Output = Self;
#[must_use]
#[inline(always)]
fn not(self) -> Self {
let all_bits = set_splat_m256(f32::from_bits(u32::MAX));
self ^ all_bits
}
}
impl Sub for m256 {
type Output = Self;
#[must_use]
#[inline(always)]
fn sub(self, rhs: Self) -> Self {
sub_m256(self, rhs)
}
}
impl SubAssign for m256 {
#[inline(always)]
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}
impl PartialEq for m256 {
#[must_use]
#[inline(always)]
fn eq(&self, other: &Self) -> bool {
let mask = m256(unsafe { _mm256_cmp_ps(self.0, other.0, _CMP_EQ_OQ) });
move_mask_m256(mask) == 0b1111_1111
}
}