#![cfg(target_feature="sse")]
#![cfg(target_feature="sse2")]
#![cfg(target_feature="sse3")]
use super::*;
/// # SSE3 Operations
impl m128 {
/// Adds odd lanes (3 and 1) and subtracts even lanes (2 and 0).
///
/// ```txt
/// out[0]= self[0] - rhs[0]
/// out[1]= self[1] + rhs[1]
/// out[2]= self[2] - rhs[2]
/// out[3]= self[3] + rhs[3]
/// ```
#[inline(always)]
pub fn add_sub(self, rhs: Self) -> Self {
Self(unsafe { _mm_addsub_ps(self.0, rhs.0) })
}
/// Horizontal add both `self` and `rhs`, then pack together.
///
/// ```txt
/// out[0]= self[0] + self[1]
/// out[1]= self[2] + self[3]
/// out[2]= rhs[0] + rhs[1]
/// out[3]= rhs[2] + rhs[3]
/// ```
#[inline(always)]
pub fn horizontal_add(self, rhs: Self) -> Self {
Self(unsafe { _mm_hadd_ps(self.0, rhs.0) })
}
/// Horizontal subtract both `self` and `rhs`, then pack together.
///
/// ```txt
/// out[0]= self[0] - self[1]
/// out[1]= self[2] - self[3]
/// out[2]= rhs[0] - rhs[1]
/// out[3]= rhs[2] - rhs[3]
/// ```
#[inline(always)]
pub fn horizontal_sub(self, rhs: Self) -> Self {
Self(unsafe { _mm_hsub_ps(self.0, rhs.0) })
}
/// Duplicate odd indexed lanes into a new `m128`.
///
/// ```txt
/// out[0]= self[1]
/// out[1]= self[1]
/// out[2]= self[3]
/// out[3]= self[3]
/// ```
#[inline(always)]
pub fn duplicate_odd(self) -> Self {
Self(unsafe { _mm_movehdup_ps(self.0) })
}
/// Duplicate even indexed lanes into a new `m128`.
///
/// ```txt
/// out[0]= self[0]
/// out[1]= self[0]
/// out[2]= self[2]
/// out[3]= self[2]
/// ```
#[inline(always)]
pub fn duplicate_even(self) -> Self {
Self(unsafe { _mm_moveldup_ps(self.0) })
}
}
/// # SSE3 Operations
impl m128d {
/// Adds the high lane (1) and subtracts the low lane (0).
///
/// ```txt
/// out[0]= self[0] - rhs[0]
/// out[1]= self[1] + rhs[1]
/// ```
#[inline(always)]
pub fn add_sub(self, rhs: Self) -> Self {
Self(unsafe { _mm_addsub_pd(self.0, rhs.0) })
}
/// Horizontal add both `self` and `rhs`, then pack together.
///
/// ```txt
/// out[0]= self[0] + self[1]
/// out[1]= rhs[0] + rhs[1]
/// ```
#[inline(always)]
pub fn horizontal_add(self, rhs: Self) -> Self {
Self(unsafe { _mm_hadd_pd(self.0, rhs.0) })
}
/// Horizontal subtract both `self` and `rhs`, then pack together.
///
/// ```txt
/// out[0]= self[0] - self[1]
/// out[1]= rhs[0] - rhs[1]
/// ```
#[inline(always)]
pub fn horizontal_sub(self, rhs: Self) -> Self {
Self(unsafe { _mm_hsub_pd(self.0, rhs.0) })
}
/// Load the given `f64` address, duplicating it into both lanes.
#[inline(always)]
#[allow(clippy::trivially_copy_pass_by_ref)]
pub fn load_splat(addr: &f64) -> Self {
Self(unsafe { _mm_loaddup_pd(addr) })
}
/// Duplicate the low lane of `self` into both lanes of a new `m128d`.
///
/// ```txt
/// out[0]= self[0]
/// out[1]= self[0]
/// ```
#[inline(always)]
pub fn duplicate_low(self) -> Self {
Self(unsafe { _mm_movedup_pd(self.0) })
}
}
/// # SSE3 Operations
impl m128i {
/// Loads 128-bits of integer data without alignment requirements.
///
/// This can perform faster than [`m128i::load_unaligned`] if the data would
/// cross a cache line boundary.
#[inline(always)]
pub fn load_quick_unaligned(addr: *const i128) -> Self {
#[allow(clippy::cast_ptr_alignment)]
Self(unsafe { _mm_lddqu_si128(addr as *const _) })
}
}