#![allow(unused_variables)]
use conversions::Convert128;
use simd::x86::sse2::{Sse2I8x16, Sse2U8x16, Sse2I16x8, Sse2U16x8, Sse2F32x4, Sse2F64x2};
use simd::x86::sse2::Sse2U32x4;
use simd::x86::sse2::Sse2I32x4;
use __m128;
use __m128i;
use __m128d;
use simd::i32x4;
use simd::i16x8;
use simd::i8x16;
use simd::x86::sse2::bool64fx2;
use std::ptr::copy_nonoverlapping;
use std::mem::transmute;
use std::mem::uninitialized;
use simd_shuffle2;
use simd_shuffle4;
use simd_shuffle8;
use simd_shuffle16;
#[allow(improper_ctypes)]
extern {
#[link_name = "llvm.x86.sse2.sqrt.sd"]
pub fn sse2_sqrt_sd(a: __m128d) -> __m128d;
#[link_name = "llvm.x86.sse2.cvtsd2si"]
pub fn sse2_cvtsd2si(a: __m128d) -> i32;
#[link_name = "llvm.x86.sse2.cvttsd2si"]
pub fn sse2_cvttsd2si(a: __m128d) -> i32;
#[link_name = "llvm.x86.sse2.cvtsd2si64"]
pub fn sse2_cvtsd2si64(a: __m128d) -> i64;
#[link_name = "llvm.x86.sse2.cvttsd2si64"]
pub fn sse2_cvttsd2si64(a: __m128d) -> i64;
#[link_name = "llvm.x86.sse2.cvtpd2ps"]
pub fn sse2_cvtpd2ps(a: __m128d) -> __m128;
#[link_name = "llvm.x86.sse2.cvtps2dq"]
pub fn sse2_cvtps2dq(a: __m128) -> i32x4;
#[link_name = "llvm.x86.sse2.cvttps2dq"]
pub fn sse2_cvttps2dq(a: __m128) -> i32x4;
#[link_name = "llvm.x86.sse2.mfence"]
pub fn sse2_mfence() ->();
#[link_name = "llvm.x86.sse2.min.sd"]
pub fn sse2_min_sd(a: __m128d, b: __m128d) -> __m128d;
#[link_name = "llvm.x86.sse2.max.sd"]
pub fn sse2_max_sd(a: __m128d, b: __m128d) -> __m128d;
#[link_name = "llvm.x86.sse2.cmp.sd"]
pub fn sse2_cmp_sd(a: __m128d, b: __m128d, c: i8) -> __m128d;
#[link_name = "llvm.x86.sse2.comieq.sd"]
pub fn sse2_comieq_sd(a: __m128d, b: __m128d) -> i32;
#[link_name = "llvm.x86.sse2.comilt.sd"]
pub fn sse2_comilt_sd(a: __m128d, b: __m128d) -> i32;
#[link_name = "llvm.x86.sse2.comile.sd"]
pub fn sse2_comile_sd(a: __m128d, b: __m128d) -> i32;
#[link_name = "llvm.x86.sse2.comigt.sd"]
pub fn sse2_comigt_sd(a: __m128d, b: __m128d) -> i32;
#[link_name = "llvm.x86.sse2.comige.sd"]
pub fn sse2_comige_sd(a: __m128d, b: __m128d) -> i32;
#[link_name = "llvm.x86.sse2.comineq.sd"]
pub fn sse2_comineq_sd(a: __m128d, b: __m128d) -> i32;
#[link_name = "llvm.x86.sse2.ucomieq.sd"]
pub fn sse2_ucomieq_sd(a: __m128d, b: __m128d) -> i32;
#[link_name = "llvm.x86.sse2.ucomilt.sd"]
pub fn sse2_ucomilt_sd(a: __m128d, b: __m128d) -> i32;
#[link_name = "llvm.x86.sse2.ucomile.sd"]
pub fn sse2_ucomile_sd(a: __m128d, b: __m128d) -> i32;
#[link_name = "llvm.x86.sse2.ucomigt.sd"]
pub fn sse2_ucomigt_sd(a: __m128d, b: __m128d) -> i32;
#[link_name = "llvm.x86.sse2.ucomige.sd"]
pub fn sse2_ucomige_sd(a: __m128d, b: __m128d) -> i32;
#[link_name = "llvm.x86.sse2.ucomineq.sd"]
pub fn sse2_ucomineq_sd(a: __m128d, b: __m128d) -> i32;
#[link_name = "llvm.x86.sse2.cvtsi2sd"]
pub fn sse2_cvtsi2sd(a: __m128d, b: i32) -> __m128d;
#[link_name = "llvm.x86.sse2.cvtsi642sd"]
pub fn sse2_cvtsi642sd(a: __m128d, b: i64) -> __m128d;
#[link_name = "llvm.x86.sse2.pslli.w"]
pub fn sse2_pslli_w(a: i16x8, b: i32) -> i16x8;
#[link_name = "llvm.x86.sse2.pslli.d"]
pub fn sse2_pslli_d(a: i32x4, b: i32) -> i32x4;
#[link_name = "llvm.x86.sse2.pslli.q"]
pub fn sse2_pslli_q(a: __m128i, b: i32) -> __m128i;
#[link_name = "llvm.x86.sse2.psrli.w"]
pub fn sse2_psrli_w(a: i16x8, b: i32) -> i16x8;
#[link_name = "llvm.x86.sse2.psrli.d"]
pub fn sse2_psrli_d(a: i32x4, b: i32) -> i32x4;
#[link_name = "llvm.x86.sse2.psrli.q"]
pub fn sse2_psrli_q(a: __m128i, b: i32) -> __m128i;
#[link_name = "llvm.x86.sse2.psrai.w"]
pub fn sse2_psrai_w(a: i16x8, b: i32) -> i16x8;
#[link_name = "llvm.x86.sse2.psrai.d"]
pub fn sse2_psrai_d(a: i32x4, b: i32) -> i32x4;
#[link_name = "llvm.x86.sse2.psll.dq"]
pub fn sse2_psll_dq(a: __m128i, b: i32) -> __m128i;
#[link_name = "llvm.x86.sse2.psrl.dq"]
pub fn sse2_psrl_dq(a: __m128i, b: i32) -> __m128i;
#[link_name = "llvm.x86.sse2.psll.w"]
pub fn sse2_psll_w(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.sse2.psll.d"]
pub fn sse2_psll_d(a: i32x4, b: i32x4) -> i32x4;
#[link_name = "llvm.x86.sse2.psll.q"]
pub fn sse2_psll_q(a: __m128i, b: __m128i) -> __m128i;
#[link_name = "llvm.x86.sse2.psrl.w"]
pub fn sse2_psrl_w(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.sse2.psrl.d"]
pub fn sse2_psrl_d(a: i32x4, b: i32x4) -> i32x4;
#[link_name = "llvm.x86.sse2.psrl.q"]
pub fn sse2_psrl_q(a: __m128i, b: __m128i) -> __m128i;
#[link_name = "llvm.x86.sse2.psra.w"]
pub fn sse2_psra_w(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.sse2.psra.d"]
pub fn sse2_psra_d(a: i32x4, b: i32x4) -> i32x4;
#[link_name = "llvm.x86.sse2.clflush"]
pub fn sse2_clflush(a: *mut i8) -> ();
#[link_name = "llvm.x86.sse2.cvtdq2pd"]
pub fn sse2_cvtdq2pd(a: i32x4) -> __m128d;
#[link_name = "llvm.x86.sse2.cvtpd2dq"]
pub fn sse2_cvtpd2dq(a: __m128d) -> i32x4;
#[link_name = "llvm.x86.sse2.cvttpd2dq"]
pub fn sse2_cvttpd2dq(a: __m128d) -> i32x4;
#[link_name = "llvm.x86.sse2.cvtsd2ss"]
pub fn sse2_cvtsd2ss(a: __m128, b: __m128d) -> __m128;
#[link_name = "llvm.x86.sse2.cvtss2sd"]
pub fn sse2_cvtss2sd(a: __m128d, b: __m128) -> __m128d;
#[link_name = "llvm.x86.sse2.lfence"]
pub fn sse2_lfence() -> ();
#[link_name = "llvm.x86.sse2.pause"]
pub fn sse2_pause() -> ();
#[link_name = "llvm.x86.sse2.maskmov.dqu"]
pub fn sse2_maskmov_dqu(a: i8x16, b: i8x16, c: *mut i8) -> ();
}
fn convert_bool64fx2_to_m128d(a: bool64fx2) -> __m128d {
unsafe { transmute(a) }
}
#[inline]
pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
(a.as_i16x8() + b.as_i16x8()).as_i64x2()
}
#[inline]
pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
(a.as_i32x4() + b.as_i32x4()).as_i64x2()
}
#[inline]
pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
a + b
}
#[inline]
pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
(a.as_i8x16() + b.as_i8x16()).as_i64x2()
}
#[inline]
pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
a + b
}
#[inline]
pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
a.replace(0, a.extract(0) + b.extract(0))
}
#[inline]
pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
a.as_i16x8().adds(b.as_i16x8()).as_i64x2()
}
#[inline]
pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
a.as_i8x16().adds(b.as_i8x16()).as_i64x2()
}
#[inline]
pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
a.as_u16x8().adds(b.as_u16x8()).as_i64x2()
}
#[inline]
pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
a.as_u8x16().adds(b.as_u8x16()).as_i64x2()
}
#[inline]
pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
(a.as_i64x2() & b.as_i64x2()).as_f64x2()
}
#[inline]
pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
a & b
}
#[inline]
pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
((a.as_i64x2() ^ __m128i::splat(!0)) & b.as_i64x2()).as_f64x2()
}
#[inline]
pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
(a ^ __m128i::splat(!0)) & b
}
#[inline]
pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
a.as_u16x8().avg(b.as_u16x8()).as_i64x2()
}
#[inline]
pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
a.as_u8x16().avg(b.as_u8x16()).as_i64x2()
}
#[inline]
pub fn _mm_bslli_si128(a: __m128i, imm8: i32) -> __m128i {
_mm_slli_si128(a, imm8)
}
#[inline]
pub fn _mm_bsrli_si128(a: __m128i, imm8: i32) -> __m128i {
_mm_srli_si128(a, imm8)
}
#[inline]
pub fn _mm_castpd_ps(a: __m128d) -> __m128 {
a.as_f32x4()
}
#[inline]
pub fn _mm_castpd_si128(a: __m128d) -> __m128i {
a.as_i64x2()
}
#[inline]
pub fn _mm_castps_pd(a: __m128) -> __m128d {
a.as_f64x2()
}
#[inline]
pub fn _mm_castps_si128(a: __m128) -> __m128i {
a.as_i64x2()
}
#[inline]
pub fn _mm_castsi128_pd(a: __m128i) -> __m128d {
a.as_f64x2()
}
#[inline]
pub fn _mm_castsi128_ps(a: __m128i) -> __m128 {
a.as_f32x4()
}
#[inline]
pub fn _mm_clflush(p: *const u8) {
unsafe { sse2_clflush(p as *mut i8) }
}
#[inline]
pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
a.as_i16x8().eq(b.as_i16x8()).to_repr().as_i64x2()
}
#[inline]
pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
a.as_i32x4().eq(b.as_i32x4()).to_repr().as_i64x2()
}
#[inline]
pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
a.as_i8x16().eq(b.as_i8x16()).to_repr().as_i64x2()
}
#[inline]
pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
convert_bool64fx2_to_m128d(a.eq(b))
}
#[inline]
pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
unsafe { sse2_cmp_sd(a, b, 0) }
}
#[inline]
pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
convert_bool64fx2_to_m128d(a.ge(b))
}
#[inline]
pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
_mm_move_sd(a, _mm_cmple_sd(b, a))
}
#[inline]
pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
a.as_i16x8().gt(b.as_i16x8()).to_repr().as_i64x2()
}
#[inline]
pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
a.as_i32x4().gt(b.as_i32x4()).to_repr().as_i64x2()
}
#[inline]
pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
a.as_i8x16().gt(b.as_i8x16()).to_repr().as_i64x2()
}
#[inline]
pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
convert_bool64fx2_to_m128d(a.gt(b))
}
#[inline]
pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
_mm_move_sd(a, _mm_cmplt_sd(b, a))
}
#[inline]
pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
convert_bool64fx2_to_m128d(a.le(b))
}
#[inline]
pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
unsafe { sse2_cmp_sd(a, b, 2) }
}
#[inline]
pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
_mm_cmpgt_epi16(b, a)
}
#[inline]
pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
_mm_cmpgt_epi32(b, a)
}
#[inline]
pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
_mm_cmpgt_epi8(b, a)
}
#[inline]
pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
convert_bool64fx2_to_m128d(a.lt(b))
}
#[inline]
pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
unsafe { sse2_cmp_sd(a, b, 1) }
}
#[inline]
pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
convert_bool64fx2_to_m128d(a.ne(b))
}
#[inline]
pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
unsafe { sse2_cmp_sd(a, b, 4) }
}
#[inline]
pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
convert_bool64fx2_to_m128d(a.lt(b) | (a.ne(a) | b.ne(b)))
}
#[inline]
pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
_mm_move_sd(a, _mm_cmpnle_sd(b, a))
}
#[inline]
pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
convert_bool64fx2_to_m128d(a.le(b) | (a.ne(a) | b.ne(b)))
}
#[inline]
pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
_mm_move_sd(a, _mm_cmpnlt_sd(b, a))
}
#[inline]
pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
convert_bool64fx2_to_m128d(a.gt(b) | (a.ne(a) | b.ne(b)))
}
#[inline]
pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
unsafe { sse2_cmp_sd(a, b, 6) }
}
#[inline]
pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
convert_bool64fx2_to_m128d(a.ge(b) | (a.ne(a) | b.ne(b)))
}
#[inline]
pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
unsafe { sse2_cmp_sd(a, b, 5) }
}
#[inline]
pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
convert_bool64fx2_to_m128d(a.eq(a) & b.eq(b))
}
#[inline]
pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
unsafe { sse2_cmp_sd(a, b, 7) }
}
#[inline]
pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
convert_bool64fx2_to_m128d(a.ne(a) | b.ne(b))
}
#[inline]
pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
unsafe { sse2_cmp_sd(a, b, 3) }
}
#[inline]
pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
unsafe { sse2_comieq_sd(a, b) }
}
#[inline]
pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
unsafe { sse2_comige_sd(a, b) }
}
#[inline]
pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
unsafe { sse2_comigt_sd(a, b) }
}
#[inline]
pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
unsafe { sse2_comile_sd(a, b) }
}
#[inline]
pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
unsafe { sse2_comilt_sd(a, b) }
}
#[inline]
pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
unsafe { sse2_comineq_sd(a, b) }
}
#[inline]
pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
unsafe { sse2_cvtdq2pd(a.as_i32x4()) }
}
#[inline]
pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
a.as_i32x4().to_f32()
}
#[inline]
pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
unsafe { sse2_cvtpd2dq(a).as_i64x2() }
}
#[inline]
pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
unsafe { sse2_cvtpd2ps(a) }
}
#[inline]
pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
unsafe { sse2_cvtps2dq(a).as_i64x2() }
}
#[inline]
pub fn _mm_cvtps_pd(a: __m128) -> __m128d {
a.to_f64()
}
#[inline]
pub fn _mm_cvtsd_f64(a: __m128d) -> f64 {
a.extract(0)
}
#[inline]
pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
unsafe { sse2_cvtsd2si(a) }
}
#[inline]
#[cfg(target_pointer_width = "64")]
pub fn _mm_cvtsd_si64(a: __m128d) -> i64 {
unsafe { sse2_cvtsd2si64(a) }
}
#[inline]
#[cfg(target_pointer_width = "64")]
pub fn _mm_cvtsd_si64x(a: __m128d) -> i64 {
_mm_cvtsd_si64(a)
}
#[inline]
pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
unsafe { sse2_cvtsd2ss(a, b) }
}
#[inline]
pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
a.as_i32x4().extract(0)
}
#[inline]
pub fn _mm_cvtsi128_si64(a: __m128i) -> i64 {
a.extract(0)
}
#[inline]
pub fn _mm_cvtsi128_si64x(a: __m128i) -> i64 {
_mm_cvtsi128_si64(a)
}
#[inline]
pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
unsafe { sse2_cvtsi2sd(a, b) }
}
#[inline]
pub fn _mm_cvtsi32_si128(a: i32) -> __m128i {
i32x4::new(a, 0, 0, 0).as_i64x2()
}
#[inline]
#[cfg(target_pointer_width = "64")]
pub fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d {
unsafe { sse2_cvtsi642sd(a, b) }
}
#[inline]
pub fn _mm_cvtsi64_si128(a: i64) -> __m128i {
__m128i::new(a, 0)
}
#[inline]
#[cfg(target_pointer_width = "64")]
pub fn _mm_cvtsi64x_sd(a: __m128d, b: i64) -> __m128d {
_mm_cvtsi64_sd(a, b)
}
#[inline]
pub fn _mm_cvtsi64x_si128(a: i64) -> __m128i {
_mm_cvtsi64_si128(a)
}
#[inline]
pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
unsafe { sse2_cvtss2sd(a, b) }
}
#[inline]
pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
unsafe { sse2_cvttpd2dq(a).as_i64x2() }
}
#[inline]
pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
unsafe { sse2_cvttps2dq(a).as_i64x2() }
}
#[inline]
pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
unsafe { sse2_cvttsd2si(a) }
}
#[inline]
#[cfg(target_pointer_width = "64")]
pub fn _mm_cvttsd_si64(a: __m128d) -> i64 {
unsafe { sse2_cvttsd2si64(a) }
}
#[inline]
#[cfg(target_pointer_width = "64")]
pub fn _mm_cvttsd_si64x(a: __m128d) -> i64 {
_mm_cvttsd_si64(a)
}
#[inline]
pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
a / b
}
#[inline]
pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
a.replace(0, a.extract(0) / b.extract(0))
}
#[inline]
pub fn _mm_extract_epi16(a: __m128i, imm8: i32) -> i32 {
a.as_i16x8().extract(imm8 as u32) as i32
}
#[inline]
pub fn _mm_insert_epi16(a: __m128i, i: i32, imm8: i32) -> __m128i {
a.as_i16x8().replace(imm8 as u32, i as i16).as_i64x2()
}
#[inline]
pub fn _mm_lfence() {
unsafe { sse2_lfence() }
}
#[inline]
pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
*(mem_addr as *const __m128d)
}
#[inline]
pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
_mm_load1_pd(mem_addr)
}
#[inline]
pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
let mut r: f64 = uninitialized();
copy_nonoverlapping(mem_addr as *const u8, &mut r as *mut f64 as *mut u8, 8);
__m128d::new(r, 0.0)
}
#[inline]
pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
*mem_addr
}
#[inline]
pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
let mut r: f64 = uninitialized();
copy_nonoverlapping(mem_addr as *const u8, &mut r as *mut f64 as *mut u8, 8);
__m128d::splat(r)
}
#[inline]
pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
let mut r: f64 = uninitialized();
copy_nonoverlapping(mem_addr as *const u8, &mut r as *mut f64 as *mut u8, 8);
__m128d::new(a.extract(0), r)
}
#[inline]
pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
let mut r: i64 = uninitialized();
copy_nonoverlapping(mem_addr as *const u8, &mut r as *mut i64 as *mut u8, 8);
__m128i::new(r, 0)
}
#[inline]
pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
let mut r: f64 = uninitialized();
copy_nonoverlapping(mem_addr as *const u8, &mut r as *mut f64 as *mut u8, 8);
__m128d::new(r, 0.0)
}
#[inline]
pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
let r = _mm_load_pd(mem_addr);
simd_shuffle2(r, r, [1, 0])
}
#[inline]
pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
let mut r: __m128d = uninitialized();
copy_nonoverlapping(mem_addr as *const u8, &mut r as *mut __m128d as *mut u8, 16);
r
}
#[inline]
pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
let mut r: __m128i = uninitialized();
copy_nonoverlapping(mem_addr as *const u8, &mut r as *mut __m128i as *mut u8, 16);
r
}
#[inline]
pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
a.as_i16x8().madd(b.as_i16x8()).as_i64x2()
}
#[inline]
pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
sse2_maskmov_dqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
}
#[inline]
pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
a.as_i16x8().max(b.as_i16x8()).as_i64x2()
}
#[inline]
pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
a.as_u8x16().max(b.as_u8x16()).as_i64x2()
}
#[inline]
pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
a.max(b)
}
#[inline]
pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
unsafe { sse2_max_sd(a, b) }
}
#[inline]
pub fn _mm_mfence() {
unsafe { sse2_mfence() }
}
#[inline]
pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
a.as_i16x8().min(b.as_i16x8()).as_i64x2()
}
#[inline]
pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
a.as_u8x16().min(b.as_u8x16()).as_i64x2()
}
#[inline]
pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
a.min(b)
}
#[inline]
pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
unsafe { sse2_min_sd(a, b) }
}
#[inline]
pub fn _mm_move_epi64(a: __m128i) -> __m128i {
__m128i::new(a.extract(0), 0)
}
#[inline]
pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
a.replace(0, b.extract(0))
}
#[inline]
pub fn _mm_movemask_epi8(a: __m128i) -> i32 {
a.as_u8x16().move_mask() as i32
}
#[inline]
pub fn _mm_movemask_pd(a: __m128d) -> i32 {
a.move_mask() as i32
}
#[inline]
pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
a.as_u32x4().low_mul(b.as_u32x4()).as_i64x2()
}
#[inline]
pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
a * b
}
#[inline]
pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
a.replace(0, a.extract(0) * b.extract(0))
}
#[inline]
pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
a.as_i16x8().mulhi(b.as_i16x8()).as_i64x2()
}
#[inline]
pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
a.as_u16x8().mulhi(b.as_u16x8()).as_i64x2()
}
#[inline]
pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
(a.as_i16x8() * b.as_i16x8()).as_i64x2()
}
#[inline]
pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
(a.as_i64x2() | b.as_i64x2()).as_f64x2()
}
#[inline]
pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
a | b
}
#[inline]
pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
a.as_i16x8().packs(b.as_i16x8()).as_i64x2()
}
#[inline]
pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
a.as_i32x4().packs(b.as_i32x4()).as_i64x2()
}
#[inline]
pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
a.as_i16x8().packus(b.as_i16x8()).as_i64x2()
}
#[inline]
pub fn _mm_pause() {
unsafe { sse2_pause() }
}
#[inline]
pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
a.as_u8x16().sad(b.as_u8x16()).as_i64x2()
}
#[inline]
pub fn _mm_set_epi16(e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16) -> __m128i {
i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7).as_i64x2()
}
#[inline]
pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
i32x4::new(e0, e1, e2, e3).as_i64x2()
}
#[inline]
pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
__m128i::new(e0, e1)
}
#[inline]
pub fn _mm_set_epi8(e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8, e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> __m128i {
i8x16::new(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15).as_i64x2()
}
#[inline]
pub fn _mm_set_pd(e1: f64, e0: f64) -> __m128d {
__m128d::new(e0, e1)
}
#[inline]
pub fn _mm_set_pd1(a: f64) -> __m128d {
__m128d::new(a, a)
}
#[inline]
pub fn _mm_set_sd(a: f64) -> __m128d {
__m128d::new(a, 0.)
}
#[inline]
pub fn _mm_set1_epi16(a: i16) -> __m128i {
i16x8::splat(a).as_i64x2()
}
#[inline]
pub fn _mm_set1_epi32(a: i32) -> __m128i {
i32x4::splat(a).as_i64x2()
}
#[inline]
pub fn _mm_set1_epi64x(a: i64) -> __m128i {
__m128i::splat(a)
}
#[inline]
pub fn _mm_set1_epi8(a: i8) -> __m128i {
i8x16::splat(a).as_i64x2()
}
#[inline]
pub fn _mm_set1_pd(a: f64) -> __m128d {
__m128d::splat(a)
}
#[inline]
pub fn _mm_setr_epi16(e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16) -> __m128i {
i16x8::new(e7, e6, e5, e4, e3, e2, e1, e0).as_i64x2()
}
#[inline]
pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
i32x4::new(e3, e2, e1, e0).as_i64x2()
}
#[inline]
pub fn _mm_setr_epi8(e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8, e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> __m128i {
i8x16::new(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0).as_i64x2()
}
#[inline]
pub fn _mm_setr_pd(e1: f64, e0: f64) -> __m128d {
__m128d::new(e1, e0)
}
#[inline]
pub fn _mm_setzero_pd() -> __m128d {
__m128d::splat(0.)
}
#[inline]
pub fn _mm_setzero_si128() -> __m128i {
__m128i::splat(0)
}
#[inline]
pub fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i {
macro_rules! shuffle {
($a:expr, $b:expr, $c:expr, $d:expr) => {
unsafe {
simd_shuffle4(a.as_i32x4(), a.as_i32x4(), [$a, $b, $c, $d])
}
}
}
macro_rules! shuffle1 {
($a:expr, $b: expr, $c: expr) => {
match (imm8 >> 6) & 3 {
0 => shuffle!($a, $b, $c, 0),
1 => shuffle!($a, $b, $c, 1),
2 => shuffle!($a, $b, $c, 2),
_ => shuffle!($a, $b, $c, 3),
}
}
}
macro_rules! shuffle2 {
($a:expr, $b:expr) => {
match (imm8 >> 4) & 3 {
0 => shuffle1!($a, $b, 0),
1 => shuffle1!($a, $b, 1),
2 => shuffle1!($a, $b, 2),
_ => shuffle1!($a, $b, 3),
}
}
}
macro_rules! shuffle3 {
($a:expr) => {
match (imm8 >> 2) & 3 {
0 => shuffle2!($a, 0),
1 => shuffle2!($a, 1),
2 => shuffle2!($a, 2),
_ => shuffle2!($a, 3),
}
}
}
macro_rules! shuffle4 {
() => {
match (imm8 >> 0) & 3 {
0 => shuffle3!(0),
1 => shuffle3!(1),
2 => shuffle3!(2),
_ => shuffle3!(3),
}
}
}
let r : i32x4 = shuffle4!();
r.as_i64x2()
}
#[inline]
pub fn _mm_shuffle_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
unsafe {
match imm8 {
0 => simd_shuffle2(a, b, [0, 2]),
1 => simd_shuffle2(a, b, [1, 2]),
2 => simd_shuffle2(a, b, [0, 3]),
_ => simd_shuffle2(a, b, [1, 3]),
}
}
}
#[inline]
pub fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i {
macro_rules! shuffle {
($a:expr, $b:expr, $c:expr, $d:expr) => {
unsafe {
simd_shuffle8(a.as_i16x8(), a.as_i16x8(), [0, 1, 2, 3, $a+4, $b+4, $c+4, $d+4])
}
}
}
macro_rules! shuffle1 {
($a:expr, $b: expr, $c: expr) => {
match (imm8 >> 6) & 3 {
0 => shuffle!($a, $b, $c, 0),
1 => shuffle!($a, $b, $c, 1),
2 => shuffle!($a, $b, $c, 2),
_ => shuffle!($a, $b, $c, 3),
}
}
}
macro_rules! shuffle2 {
($a:expr, $b:expr) => {
match (imm8 >> 4) & 3 {
0 => shuffle1!($a, $b, 0),
1 => shuffle1!($a, $b, 1),
2 => shuffle1!($a, $b, 2),
_ => shuffle1!($a, $b, 3),
}
}
}
macro_rules! shuffle3 {
($a:expr) => {
match (imm8 >> 2) & 3 {
0 => shuffle2!($a, 0),
1 => shuffle2!($a, 1),
2 => shuffle2!($a, 2),
_ => shuffle2!($a, 3),
}
}
}
macro_rules! shuffle4 {
() => {
match (imm8 >> 0) & 3 {
0 => shuffle3!(0),
1 => shuffle3!(1),
2 => shuffle3!(2),
_ => shuffle3!(3),
}
}
}
let r : i16x8 = shuffle4!();
r.as_i64x2()
}
#[inline]
pub fn _mm_shufflelo_epi16(a: __m128i, imm8: i32) -> __m128i {
macro_rules! shuffle {
($a:expr, $b:expr, $c:expr, $d:expr) => {
unsafe {
simd_shuffle8(a.as_i16x8(), a.as_i16x8(), [$a, $b, $c, $d, 4, 5, 6, 7])
}
}
}
macro_rules! shuffle1 {
($a:expr, $b: expr, $c: expr) => {
match (imm8 >> 6) & 3 {
0 => shuffle!($a, $b, $c, 0),
1 => shuffle!($a, $b, $c, 1),
2 => shuffle!($a, $b, $c, 2),
_ => shuffle!($a, $b, $c, 3),
}
}
}
macro_rules! shuffle2 {
($a:expr, $b:expr) => {
match (imm8 >> 4) & 3 {
0 => shuffle1!($a, $b, 0),
1 => shuffle1!($a, $b, 1),
2 => shuffle1!($a, $b, 2),
_ => shuffle1!($a, $b, 3),
}
}
}
macro_rules! shuffle3 {
($a:expr) => {
match (imm8 >> 2) & 3 {
0 => shuffle2!($a, 0),
1 => shuffle2!($a, 1),
2 => shuffle2!($a, 2),
_ => shuffle2!($a, 3),
}
}
}
macro_rules! shuffle4 {
() => {
match (imm8 >> 0) & 3 {
0 => shuffle3!(0),
1 => shuffle3!(1),
2 => shuffle3!(2),
_ => shuffle3!(3),
}
}
}
let r : i16x8 = shuffle4!();
r.as_i64x2()
}
#[inline]
pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
unsafe { sse2_psll_w(a.as_i16x8(), count.as_i16x8()).as_i64x2() }
}
#[inline]
pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
unsafe { sse2_psll_d(a.as_i32x4(), count.as_i32x4()).as_i64x2() }
}
#[inline]
pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
unsafe { sse2_psll_q(a, count) }
}
#[inline]
pub fn _mm_slli_epi16(a: __m128i, imm8: i32) -> __m128i {
unsafe { sse2_pslli_w(a.as_i16x8(), imm8).as_i64x2() }
}
#[inline]
pub fn _mm_slli_epi32(a: __m128i, imm8: i32) -> __m128i {
unsafe { sse2_pslli_d(a.as_i32x4(), imm8).as_i64x2() }
}
#[inline]
pub fn _mm_slli_epi64(a: __m128i, imm8: i32) -> __m128i {
unsafe { sse2_pslli_q(a, imm8) }
}
#[inline]
pub fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i {
let a = a.as_i8x16();
let zero = i8x16::splat(0);
macro_rules! slli_shift {
($n:expr) => {
simd_shuffle16(zero, a, [16 - $n, 17 - $n, 18 - $n, 19 - $n,
20 - $n, 21 - $n, 22 - $n, 23 - $n,
24 - $n, 25 - $n, 26 - $n, 27 - $n,
28 - $n, 29 - $n, 30 - $n, 31 - $n])
}
}
let r: i8x16 = unsafe { match imm8 {
0 => slli_shift!(0),
1 => slli_shift!(1),
2 => slli_shift!(2),
3 => slli_shift!(3),
4 => slli_shift!(4),
5 => slli_shift!(5),
6 => slli_shift!(6),
7 => slli_shift!(7),
8 => slli_shift!(8),
9 => slli_shift!(9),
10 => slli_shift!(10),
11 => slli_shift!(11),
12 => slli_shift!(12),
13 => slli_shift!(13),
14 => slli_shift!(14),
15 => slli_shift!(15),
_ => slli_shift!(16),
}};
r.as_i64x2()
}
#[inline]
pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
a.sqrt()
}
#[inline]
pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
unsafe { sse2_sqrt_sd(a) }
}
#[inline]
pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
unsafe { sse2_psra_w(a.as_i16x8(), count.as_i16x8()).as_i64x2() }
}
#[inline]
pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
unsafe { sse2_psra_d(a.as_i32x4(), count.as_i32x4()).as_i64x2() }
}
#[inline]
pub fn _mm_srai_epi16(a: __m128i, imm8: i32) -> __m128i {
unsafe { sse2_psrai_w(a.as_i16x8(), imm8).as_i64x2() }
}
#[inline]
pub fn _mm_srai_epi32(a: __m128i, imm8: i32) -> __m128i {
unsafe { sse2_psrai_d(a.as_i32x4(), imm8).as_i64x2() }
}
#[inline]
pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
unsafe { sse2_psrl_w(a.as_i16x8(), count.as_i16x8()).as_i64x2() }
}
#[inline]
pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
unsafe { sse2_psrl_d(a.as_i32x4(), count.as_i32x4()).as_i64x2() }
}
#[inline]
pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
unsafe { sse2_psrl_q(a, count) }
}
#[inline]
pub fn _mm_srli_epi16(a: __m128i, imm8: i32) -> __m128i {
unsafe { sse2_psrli_w(a.as_i16x8(), imm8).as_i64x2() }
}
#[inline]
pub fn _mm_srli_epi32(a: __m128i, imm8: i32) -> __m128i {
unsafe { sse2_psrli_d(a.as_i32x4(), imm8).as_i64x2() }
}
#[inline]
pub fn _mm_srli_epi64(a: __m128i, imm8: i32) -> __m128i {
unsafe { sse2_psrli_q(a, imm8) }
}
#[inline]
pub fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i {
let a = a.as_i8x16();
let zero = i8x16::splat(0);
macro_rules! srli_shift {
($n:expr) => {
simd_shuffle16(a, zero, [$n + 0, $n + 1, $n + 2, $n + 3,
$n + 4, $n + 5, $n + 6, $n + 7,
$n + 8, $n + 9, $n + 10, $n + 11,
$n + 12, $n + 13, $n + 14, $n + 15])
}
}
let r: i8x16 = unsafe { match imm8 {
0 => srli_shift!(0),
1 => srli_shift!(1),
2 => srli_shift!(2),
3 => srli_shift!(3),
4 => srli_shift!(4),
5 => srli_shift!(5),
6 => srli_shift!(6),
7 => srli_shift!(7),
8 => srli_shift!(8),
9 => srli_shift!(9),
10 => srli_shift!(10),
11 => srli_shift!(11),
12 => srli_shift!(12),
13 => srli_shift!(13),
14 => srli_shift!(14),
15 => srli_shift!(15),
_ => srli_shift!(16),
}};
r.as_i64x2()
}
#[inline]
pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
let mem_addr = mem_addr as *mut __m128d;
*mem_addr = a;
}
#[inline]
pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
_mm_store1_pd(mem_addr, a)
}
#[inline]
pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
let mem_addr = mem_addr as *mut u8;
let pa = &a as *const __m128d as *const u8;
copy_nonoverlapping(pa, mem_addr, 8)
}
#[inline]
pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
*mem_addr = a;
}
#[inline]
pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
_mm_store_pd(mem_addr, __m128d::new(a.extract(0), a.extract(0)))
}
#[inline]
pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
let mem_addr = mem_addr as *mut u8;
let pa = &a as *const __m128d as *const u8;
copy_nonoverlapping(pa.offset(0), mem_addr, 8)
}
#[inline]
pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
let mem_addr = mem_addr as *mut u8;
let pa = &a as *const __m128i as *const u8;
copy_nonoverlapping(pa, mem_addr, 8)
}
#[inline]
pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
let mem_addr = mem_addr as *mut u8;
let pa = &a as *const __m128d as *const u8;
copy_nonoverlapping(pa, mem_addr, 8)
}
#[inline]
pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
_mm_store_pd(mem_addr, __m128d::new(a.extract(1), a.extract(0)));
}
#[inline]
pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
let mem_addr = mem_addr as *mut u8;
let pa = &a as *const __m128d as *const u8;
copy_nonoverlapping(pa, mem_addr, 16)
}
#[inline]
pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
let mem_addr = mem_addr as *mut u8;
let pa = &a as *const __m128i as *const u8;
copy_nonoverlapping(pa, mem_addr, 16)
}
#[inline]
pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
(a.as_i16x8() + b.as_i16x8()).as_i64x2()
}
#[inline]
pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
(a.as_i32x4() - b.as_i32x4()).as_i64x2()
}
#[inline]
pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
a - b
}
#[inline]
pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
(a.as_i8x16() + b.as_i8x16()).as_i64x2()
}
#[inline]
pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
a - b
}
#[inline]
pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
a.replace(0, a.extract(0) - b.extract(0))
}
#[inline]
pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
a.as_i16x8().subs(b.as_i16x8()).as_i64x2()
}
#[inline]
pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
a.as_i8x16().subs(b.as_i8x16()).as_i64x2()
}
#[inline]
pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
a.as_u16x8().subs(b.as_u16x8()).as_i64x2()
}
#[inline]
pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
a.as_u8x16().subs(b.as_u8x16()).as_i64x2()
}
#[inline]
pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
unsafe { sse2_ucomieq_sd(a, b) }
}
#[inline]
pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
unsafe { sse2_ucomige_sd(a, b) }
}
#[inline]
pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
unsafe { sse2_ucomigt_sd(a, b) }
}
#[inline]
pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
unsafe { sse2_ucomile_sd(a, b) }
}
#[inline]
pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
unsafe { sse2_ucomilt_sd(a, b) }
}
#[inline]
pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
unsafe { sse2_ucomineq_sd(a, b) }
}
#[inline]
pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
let a = a.as_i16x8();
let b = b.as_i16x8();
let r: i16x8 = unsafe { simd_shuffle8(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) };
r.as_i64x2()
}
#[inline]
pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
let r: i32x4 = unsafe { simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7]) };
r.as_i64x2()
}
#[inline]
pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
unsafe { simd_shuffle2(a, b, [1, 3]) }
}
#[inline]
pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
let a = a.as_i8x16();
let b = b.as_i8x16(