use crate::{Level, arch_types::ArchTypes, prelude::*, seal::Seal};
use crate::{
f32x4, f32x8, f32x16, f64x2, f64x4, f64x8, i8x16, i8x32, i8x64, i16x8, i16x16, i16x32, i32x4,
i32x8, i32x16, mask8x16, mask8x32, mask8x64, mask16x8, mask16x16, mask16x32, mask32x4,
mask32x8, mask32x16, mask64x2, mask64x4, mask64x8, u8x16, u8x32, u8x64, u16x8, u16x16, u16x32,
u32x4, u32x8, u32x16,
};
use core::ops::*;
#[cfg(all(feature = "libm", not(feature = "std")))]
trait FloatExt {
fn floor(self) -> Self;
fn ceil(self) -> Self;
fn round_ties_even(self) -> Self;
fn fract(self) -> Self;
fn sqrt(self) -> Self;
fn trunc(self) -> Self;
}
#[cfg(all(feature = "libm", not(feature = "std")))]
impl FloatExt for f32 {
#[inline(always)]
fn floor(self) -> f32 {
libm::floorf(self)
}
#[inline(always)]
fn ceil(self) -> f32 {
libm::ceilf(self)
}
#[inline(always)]
fn round_ties_even(self) -> f32 {
libm::rintf(self)
}
#[inline(always)]
fn sqrt(self) -> f32 {
libm::sqrtf(self)
}
#[inline(always)]
fn fract(self) -> f32 {
self - self.trunc()
}
#[inline(always)]
fn trunc(self) -> f32 {
libm::truncf(self)
}
}
#[cfg(all(feature = "libm", not(feature = "std")))]
impl FloatExt for f64 {
#[inline(always)]
fn floor(self) -> f64 {
libm::floor(self)
}
#[inline(always)]
fn ceil(self) -> f64 {
libm::ceil(self)
}
#[inline(always)]
fn round_ties_even(self) -> f64 {
libm::rint(self)
}
#[inline(always)]
fn sqrt(self) -> f64 {
libm::sqrt(self)
}
#[inline(always)]
fn fract(self) -> f64 {
self - self.trunc()
}
#[inline(always)]
fn trunc(self) -> f64 {
libm::trunc(self)
}
}
#[doc = "The SIMD token for the \"fallback\" level."]
#[derive(Clone, Copy, Debug)]
pub struct Fallback {
pub fallback: crate::core_arch::fallback::Fallback,
}
impl Fallback {
#[inline]
pub const fn new() -> Self {
Self {
fallback: crate::core_arch::fallback::Fallback::new(),
}
}
}
impl Seal for Fallback {}
impl ArchTypes for Fallback {
type f32x4 = crate::support::Aligned128<[f32; 4usize]>;
type i8x16 = crate::support::Aligned128<[i8; 16usize]>;
type u8x16 = crate::support::Aligned128<[u8; 16usize]>;
type mask8x16 = crate::support::Aligned128<[i8; 16usize]>;
type i16x8 = crate::support::Aligned128<[i16; 8usize]>;
type u16x8 = crate::support::Aligned128<[u16; 8usize]>;
type mask16x8 = crate::support::Aligned128<[i16; 8usize]>;
type i32x4 = crate::support::Aligned128<[i32; 4usize]>;
type u32x4 = crate::support::Aligned128<[u32; 4usize]>;
type mask32x4 = crate::support::Aligned128<[i32; 4usize]>;
type f64x2 = crate::support::Aligned128<[f64; 2usize]>;
type mask64x2 = crate::support::Aligned128<[i64; 2usize]>;
type f32x8 = crate::support::Aligned256<[f32; 8usize]>;
type i8x32 = crate::support::Aligned256<[i8; 32usize]>;
type u8x32 = crate::support::Aligned256<[u8; 32usize]>;
type mask8x32 = crate::support::Aligned256<[i8; 32usize]>;
type i16x16 = crate::support::Aligned256<[i16; 16usize]>;
type u16x16 = crate::support::Aligned256<[u16; 16usize]>;
type mask16x16 = crate::support::Aligned256<[i16; 16usize]>;
type i32x8 = crate::support::Aligned256<[i32; 8usize]>;
type u32x8 = crate::support::Aligned256<[u32; 8usize]>;
type mask32x8 = crate::support::Aligned256<[i32; 8usize]>;
type f64x4 = crate::support::Aligned256<[f64; 4usize]>;
type mask64x4 = crate::support::Aligned256<[i64; 4usize]>;
type f32x16 = crate::support::Aligned512<[f32; 16usize]>;
type i8x64 = crate::support::Aligned512<[i8; 64usize]>;
type u8x64 = crate::support::Aligned512<[u8; 64usize]>;
type mask8x64 = crate::support::Aligned512<[i8; 64usize]>;
type i16x32 = crate::support::Aligned512<[i16; 32usize]>;
type u16x32 = crate::support::Aligned512<[u16; 32usize]>;
type mask16x32 = crate::support::Aligned512<[i16; 32usize]>;
type i32x16 = crate::support::Aligned512<[i32; 16usize]>;
type u32x16 = crate::support::Aligned512<[u32; 16usize]>;
type mask32x16 = crate::support::Aligned512<[i32; 16usize]>;
type f64x8 = crate::support::Aligned512<[f64; 8usize]>;
type mask64x8 = crate::support::Aligned512<[i64; 8usize]>;
}
impl Simd for Fallback {
type f32s = f32x4<Self>;
type f64s = f64x2<Self>;
type u8s = u8x16<Self>;
type i8s = i8x16<Self>;
type u16s = u16x8<Self>;
type i16s = i16x8<Self>;
type u32s = u32x4<Self>;
type i32s = i32x4<Self>;
type mask8s = mask8x16<Self>;
type mask16s = mask16x8<Self>;
type mask32s = mask32x4<Self>;
type mask64s = mask64x2<Self>;
#[inline(always)]
fn level(self) -> Level {
#[cfg(feature = "force_support_fallback")]
return Level::Fallback(self);
#[cfg(not(feature = "force_support_fallback"))]
Level::baseline()
}
#[inline]
fn vectorize<F: FnOnce() -> R, R>(self, f: F) -> R {
fn vectorize_inner<F: FnOnce() -> R, R>(f: F) -> R {
f()
}
vectorize_inner(f)
}
#[inline(always)]
fn splat_f32x4(self, val: f32) -> f32x4<Self> {
[val; 4usize].simd_into(self)
}
#[inline(always)]
fn load_array_f32x4(self, val: [f32; 4usize]) -> f32x4<Self> {
f32x4 {
val: crate::support::Aligned128(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_f32x4(self, val: &[f32; 4usize]) -> f32x4<Self> {
f32x4 {
val: crate::support::Aligned128(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_f32x4(self, a: f32x4<Self>) -> [f32; 4usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_f32x4(self, a: &f32x4<Self>) -> &[f32; 4usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_f32x4(self, a: &mut f32x4<Self>) -> &mut [f32; 4usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_f32x4(self, a: f32x4<Self>, dest: &mut [f32; 4usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_f32x4(self, a: u8x16<Self>) -> f32x4<Self> {
unsafe {
f32x4 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_f32x4(self, a: f32x4<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_f32x4<const SHIFT: usize>(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
let mut dest = [Default::default(); 4usize];
dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_f32x4<const SHIFT: usize>(
self,
a: f32x4<Self>,
b: f32x4<Self>,
) -> f32x4<Self> {
self.slide_f32x4::<SHIFT>(a, b)
}
#[inline(always)]
fn abs_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
[
f32::abs(a[0usize]),
f32::abs(a[1usize]),
f32::abs(a[2usize]),
f32::abs(a[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn neg_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
[
f32::neg(a[0usize]),
f32::neg(a[1usize]),
f32::neg(a[2usize]),
f32::neg(a[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn sqrt_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
[
f32::sqrt(a[0usize]),
f32::sqrt(a[1usize]),
f32::sqrt(a[2usize]),
f32::sqrt(a[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn add_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
[
f32::add(a[0usize], &b[0usize]),
f32::add(a[1usize], &b[1usize]),
f32::add(a[2usize], &b[2usize]),
f32::add(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn sub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
[
f32::sub(a[0usize], &b[0usize]),
f32::sub(a[1usize], &b[1usize]),
f32::sub(a[2usize], &b[2usize]),
f32::sub(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn mul_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
[
f32::mul(a[0usize], &b[0usize]),
f32::mul(a[1usize], &b[1usize]),
f32::mul(a[2usize], &b[2usize]),
f32::mul(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn div_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
[
f32::div(a[0usize], &b[0usize]),
f32::div(a[1usize], &b[1usize]),
f32::div(a[2usize], &b[2usize]),
f32::div(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn copysign_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
[
f32::copysign(a[0usize], b[0usize]),
f32::copysign(a[1usize], b[1usize]),
f32::copysign(a[2usize], b[2usize]),
f32::copysign(a[3usize], b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn simd_eq_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
[
-(f32::eq(&a[0usize], &b[0usize]) as i32),
-(f32::eq(&a[1usize], &b[1usize]) as i32),
-(f32::eq(&a[2usize], &b[2usize]) as i32),
-(f32::eq(&a[3usize], &b[3usize]) as i32),
]
.simd_into(self)
}
#[inline(always)]
fn simd_lt_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
[
-(f32::lt(&a[0usize], &b[0usize]) as i32),
-(f32::lt(&a[1usize], &b[1usize]) as i32),
-(f32::lt(&a[2usize], &b[2usize]) as i32),
-(f32::lt(&a[3usize], &b[3usize]) as i32),
]
.simd_into(self)
}
#[inline(always)]
fn simd_le_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
[
-(f32::le(&a[0usize], &b[0usize]) as i32),
-(f32::le(&a[1usize], &b[1usize]) as i32),
-(f32::le(&a[2usize], &b[2usize]) as i32),
-(f32::le(&a[3usize], &b[3usize]) as i32),
]
.simd_into(self)
}
#[inline(always)]
fn simd_ge_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
[
-(f32::ge(&a[0usize], &b[0usize]) as i32),
-(f32::ge(&a[1usize], &b[1usize]) as i32),
-(f32::ge(&a[2usize], &b[2usize]) as i32),
-(f32::ge(&a[3usize], &b[3usize]) as i32),
]
.simd_into(self)
}
#[inline(always)]
fn simd_gt_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
[
-(f32::gt(&a[0usize], &b[0usize]) as i32),
-(f32::gt(&a[1usize], &b[1usize]) as i32),
-(f32::gt(&a[2usize], &b[2usize]) as i32),
-(f32::gt(&a[3usize], &b[3usize]) as i32),
]
.simd_into(self)
}
#[inline(always)]
fn zip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
[a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
}
#[inline(always)]
fn zip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
[a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
}
#[inline(always)]
fn unzip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
[a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
}
#[inline(always)]
fn unzip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
[a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
}
#[inline(always)]
fn max_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
[
f32::max(a[0usize], b[0usize]),
f32::max(a[1usize], b[1usize]),
f32::max(a[2usize], b[2usize]),
f32::max(a[3usize], b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn min_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
[
f32::min(a[0usize], b[0usize]),
f32::min(a[1usize], b[1usize]),
f32::min(a[2usize], b[2usize]),
f32::min(a[3usize], b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn max_precise_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
[
f32::max(a[0usize], b[0usize]),
f32::max(a[1usize], b[1usize]),
f32::max(a[2usize], b[2usize]),
f32::max(a[3usize], b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn min_precise_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
[
f32::min(a[0usize], b[0usize]),
f32::min(a[1usize], b[1usize]),
f32::min(a[2usize], b[2usize]),
f32::min(a[3usize], b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn mul_add_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
a.mul(b).add(c)
}
#[inline(always)]
fn mul_sub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
a.mul(b).sub(c)
}
#[inline(always)]
fn floor_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
[
f32::floor(a[0usize]),
f32::floor(a[1usize]),
f32::floor(a[2usize]),
f32::floor(a[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn ceil_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
[
f32::ceil(a[0usize]),
f32::ceil(a[1usize]),
f32::ceil(a[2usize]),
f32::ceil(a[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn round_ties_even_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
[
f32::round_ties_even(a[0usize]),
f32::round_ties_even(a[1usize]),
f32::round_ties_even(a[2usize]),
f32::round_ties_even(a[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn fract_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
[
f32::fract(a[0usize]),
f32::fract(a[1usize]),
f32::fract(a[2usize]),
f32::fract(a[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn trunc_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
[
f32::trunc(a[0usize]),
f32::trunc(a[1usize]),
f32::trunc(a[2usize]),
f32::trunc(a[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn select_f32x4(self, a: mask32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
[
if a[0usize] != 0 { b[0usize] } else { c[0usize] },
if a[1usize] != 0 { b[1usize] } else { c[1usize] },
if a[2usize] != 0 { b[2usize] } else { c[2usize] },
if a[3usize] != 0 { b[3usize] } else { c[3usize] },
]
.simd_into(self)
}
#[inline(always)]
fn combine_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x8<Self> {
let mut result = [0.0; 8usize];
result[0..4usize].copy_from_slice(&a.val.0);
result[4usize..8usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn reinterpret_f64_f32x4(self, a: f32x4<Self>) -> f64x2<Self> {
a.bitcast()
}
#[inline(always)]
fn reinterpret_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
a.bitcast()
}
#[inline(always)]
fn reinterpret_u8_f32x4(self, a: f32x4<Self>) -> u8x16<Self> {
a.bitcast()
}
#[inline(always)]
fn reinterpret_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
a.bitcast()
}
#[inline(always)]
fn cvt_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
[
a[0usize] as u32,
a[1usize] as u32,
a[2usize] as u32,
a[3usize] as u32,
]
.simd_into(self)
}
#[inline(always)]
fn cvt_u32_precise_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
self.cvt_u32_f32x4(a)
}
#[inline(always)]
fn cvt_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
[
a[0usize] as i32,
a[1usize] as i32,
a[2usize] as i32,
a[3usize] as i32,
]
.simd_into(self)
}
#[inline(always)]
fn cvt_i32_precise_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
self.cvt_i32_f32x4(a)
}
#[inline(always)]
fn splat_i8x16(self, val: i8) -> i8x16<Self> {
[val; 16usize].simd_into(self)
}
#[inline(always)]
fn load_array_i8x16(self, val: [i8; 16usize]) -> i8x16<Self> {
i8x16 {
val: crate::support::Aligned128(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i8x16(self, val: &[i8; 16usize]) -> i8x16<Self> {
i8x16 {
val: crate::support::Aligned128(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_i8x16(self, a: i8x16<Self>) -> [i8; 16usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_i8x16(self, a: &i8x16<Self>) -> &[i8; 16usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_i8x16(self, a: &mut i8x16<Self>) -> &mut [i8; 16usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_i8x16(self, a: i8x16<Self>, dest: &mut [i8; 16usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_i8x16(self, a: u8x16<Self>) -> i8x16<Self> {
unsafe {
i8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i8x16(self, a: i8x16<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i8x16<const SHIFT: usize>(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
let mut dest = [Default::default(); 16usize];
dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_i8x16<const SHIFT: usize>(
self,
a: i8x16<Self>,
b: i8x16<Self>,
) -> i8x16<Self> {
self.slide_i8x16::<SHIFT>(a, b)
}
#[inline(always)]
fn add_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
[
i8::wrapping_add(a[0usize], b[0usize]),
i8::wrapping_add(a[1usize], b[1usize]),
i8::wrapping_add(a[2usize], b[2usize]),
i8::wrapping_add(a[3usize], b[3usize]),
i8::wrapping_add(a[4usize], b[4usize]),
i8::wrapping_add(a[5usize], b[5usize]),
i8::wrapping_add(a[6usize], b[6usize]),
i8::wrapping_add(a[7usize], b[7usize]),
i8::wrapping_add(a[8usize], b[8usize]),
i8::wrapping_add(a[9usize], b[9usize]),
i8::wrapping_add(a[10usize], b[10usize]),
i8::wrapping_add(a[11usize], b[11usize]),
i8::wrapping_add(a[12usize], b[12usize]),
i8::wrapping_add(a[13usize], b[13usize]),
i8::wrapping_add(a[14usize], b[14usize]),
i8::wrapping_add(a[15usize], b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn sub_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
[
i8::wrapping_sub(a[0usize], b[0usize]),
i8::wrapping_sub(a[1usize], b[1usize]),
i8::wrapping_sub(a[2usize], b[2usize]),
i8::wrapping_sub(a[3usize], b[3usize]),
i8::wrapping_sub(a[4usize], b[4usize]),
i8::wrapping_sub(a[5usize], b[5usize]),
i8::wrapping_sub(a[6usize], b[6usize]),
i8::wrapping_sub(a[7usize], b[7usize]),
i8::wrapping_sub(a[8usize], b[8usize]),
i8::wrapping_sub(a[9usize], b[9usize]),
i8::wrapping_sub(a[10usize], b[10usize]),
i8::wrapping_sub(a[11usize], b[11usize]),
i8::wrapping_sub(a[12usize], b[12usize]),
i8::wrapping_sub(a[13usize], b[13usize]),
i8::wrapping_sub(a[14usize], b[14usize]),
i8::wrapping_sub(a[15usize], b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn mul_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
[
i8::wrapping_mul(a[0usize], b[0usize]),
i8::wrapping_mul(a[1usize], b[1usize]),
i8::wrapping_mul(a[2usize], b[2usize]),
i8::wrapping_mul(a[3usize], b[3usize]),
i8::wrapping_mul(a[4usize], b[4usize]),
i8::wrapping_mul(a[5usize], b[5usize]),
i8::wrapping_mul(a[6usize], b[6usize]),
i8::wrapping_mul(a[7usize], b[7usize]),
i8::wrapping_mul(a[8usize], b[8usize]),
i8::wrapping_mul(a[9usize], b[9usize]),
i8::wrapping_mul(a[10usize], b[10usize]),
i8::wrapping_mul(a[11usize], b[11usize]),
i8::wrapping_mul(a[12usize], b[12usize]),
i8::wrapping_mul(a[13usize], b[13usize]),
i8::wrapping_mul(a[14usize], b[14usize]),
i8::wrapping_mul(a[15usize], b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn and_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
[
i8::bitand(a[0usize], &b[0usize]),
i8::bitand(a[1usize], &b[1usize]),
i8::bitand(a[2usize], &b[2usize]),
i8::bitand(a[3usize], &b[3usize]),
i8::bitand(a[4usize], &b[4usize]),
i8::bitand(a[5usize], &b[5usize]),
i8::bitand(a[6usize], &b[6usize]),
i8::bitand(a[7usize], &b[7usize]),
i8::bitand(a[8usize], &b[8usize]),
i8::bitand(a[9usize], &b[9usize]),
i8::bitand(a[10usize], &b[10usize]),
i8::bitand(a[11usize], &b[11usize]),
i8::bitand(a[12usize], &b[12usize]),
i8::bitand(a[13usize], &b[13usize]),
i8::bitand(a[14usize], &b[14usize]),
i8::bitand(a[15usize], &b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn or_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
[
i8::bitor(a[0usize], &b[0usize]),
i8::bitor(a[1usize], &b[1usize]),
i8::bitor(a[2usize], &b[2usize]),
i8::bitor(a[3usize], &b[3usize]),
i8::bitor(a[4usize], &b[4usize]),
i8::bitor(a[5usize], &b[5usize]),
i8::bitor(a[6usize], &b[6usize]),
i8::bitor(a[7usize], &b[7usize]),
i8::bitor(a[8usize], &b[8usize]),
i8::bitor(a[9usize], &b[9usize]),
i8::bitor(a[10usize], &b[10usize]),
i8::bitor(a[11usize], &b[11usize]),
i8::bitor(a[12usize], &b[12usize]),
i8::bitor(a[13usize], &b[13usize]),
i8::bitor(a[14usize], &b[14usize]),
i8::bitor(a[15usize], &b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn xor_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
[
i8::bitxor(a[0usize], &b[0usize]),
i8::bitxor(a[1usize], &b[1usize]),
i8::bitxor(a[2usize], &b[2usize]),
i8::bitxor(a[3usize], &b[3usize]),
i8::bitxor(a[4usize], &b[4usize]),
i8::bitxor(a[5usize], &b[5usize]),
i8::bitxor(a[6usize], &b[6usize]),
i8::bitxor(a[7usize], &b[7usize]),
i8::bitxor(a[8usize], &b[8usize]),
i8::bitxor(a[9usize], &b[9usize]),
i8::bitxor(a[10usize], &b[10usize]),
i8::bitxor(a[11usize], &b[11usize]),
i8::bitxor(a[12usize], &b[12usize]),
i8::bitxor(a[13usize], &b[13usize]),
i8::bitxor(a[14usize], &b[14usize]),
i8::bitxor(a[15usize], &b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn not_i8x16(self, a: i8x16<Self>) -> i8x16<Self> {
[
i8::not(a[0usize]),
i8::not(a[1usize]),
i8::not(a[2usize]),
i8::not(a[3usize]),
i8::not(a[4usize]),
i8::not(a[5usize]),
i8::not(a[6usize]),
i8::not(a[7usize]),
i8::not(a[8usize]),
i8::not(a[9usize]),
i8::not(a[10usize]),
i8::not(a[11usize]),
i8::not(a[12usize]),
i8::not(a[13usize]),
i8::not(a[14usize]),
i8::not(a[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn shl_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
[
i8::shl(a[0usize], shift),
i8::shl(a[1usize], shift),
i8::shl(a[2usize], shift),
i8::shl(a[3usize], shift),
i8::shl(a[4usize], shift),
i8::shl(a[5usize], shift),
i8::shl(a[6usize], shift),
i8::shl(a[7usize], shift),
i8::shl(a[8usize], shift),
i8::shl(a[9usize], shift),
i8::shl(a[10usize], shift),
i8::shl(a[11usize], shift),
i8::shl(a[12usize], shift),
i8::shl(a[13usize], shift),
i8::shl(a[14usize], shift),
i8::shl(a[15usize], shift),
]
.simd_into(self)
}
#[inline(always)]
fn shlv_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
[
i8::shl(a[0usize], &b[0usize]),
i8::shl(a[1usize], &b[1usize]),
i8::shl(a[2usize], &b[2usize]),
i8::shl(a[3usize], &b[3usize]),
i8::shl(a[4usize], &b[4usize]),
i8::shl(a[5usize], &b[5usize]),
i8::shl(a[6usize], &b[6usize]),
i8::shl(a[7usize], &b[7usize]),
i8::shl(a[8usize], &b[8usize]),
i8::shl(a[9usize], &b[9usize]),
i8::shl(a[10usize], &b[10usize]),
i8::shl(a[11usize], &b[11usize]),
i8::shl(a[12usize], &b[12usize]),
i8::shl(a[13usize], &b[13usize]),
i8::shl(a[14usize], &b[14usize]),
i8::shl(a[15usize], &b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn shr_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
[
i8::shr(a[0usize], shift),
i8::shr(a[1usize], shift),
i8::shr(a[2usize], shift),
i8::shr(a[3usize], shift),
i8::shr(a[4usize], shift),
i8::shr(a[5usize], shift),
i8::shr(a[6usize], shift),
i8::shr(a[7usize], shift),
i8::shr(a[8usize], shift),
i8::shr(a[9usize], shift),
i8::shr(a[10usize], shift),
i8::shr(a[11usize], shift),
i8::shr(a[12usize], shift),
i8::shr(a[13usize], shift),
i8::shr(a[14usize], shift),
i8::shr(a[15usize], shift),
]
.simd_into(self)
}
#[inline(always)]
fn shrv_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
[
i8::shr(a[0usize], &b[0usize]),
i8::shr(a[1usize], &b[1usize]),
i8::shr(a[2usize], &b[2usize]),
i8::shr(a[3usize], &b[3usize]),
i8::shr(a[4usize], &b[4usize]),
i8::shr(a[5usize], &b[5usize]),
i8::shr(a[6usize], &b[6usize]),
i8::shr(a[7usize], &b[7usize]),
i8::shr(a[8usize], &b[8usize]),
i8::shr(a[9usize], &b[9usize]),
i8::shr(a[10usize], &b[10usize]),
i8::shr(a[11usize], &b[11usize]),
i8::shr(a[12usize], &b[12usize]),
i8::shr(a[13usize], &b[13usize]),
i8::shr(a[14usize], &b[14usize]),
i8::shr(a[15usize], &b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn simd_eq_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
[
-(i8::eq(&a[0usize], &b[0usize]) as i8),
-(i8::eq(&a[1usize], &b[1usize]) as i8),
-(i8::eq(&a[2usize], &b[2usize]) as i8),
-(i8::eq(&a[3usize], &b[3usize]) as i8),
-(i8::eq(&a[4usize], &b[4usize]) as i8),
-(i8::eq(&a[5usize], &b[5usize]) as i8),
-(i8::eq(&a[6usize], &b[6usize]) as i8),
-(i8::eq(&a[7usize], &b[7usize]) as i8),
-(i8::eq(&a[8usize], &b[8usize]) as i8),
-(i8::eq(&a[9usize], &b[9usize]) as i8),
-(i8::eq(&a[10usize], &b[10usize]) as i8),
-(i8::eq(&a[11usize], &b[11usize]) as i8),
-(i8::eq(&a[12usize], &b[12usize]) as i8),
-(i8::eq(&a[13usize], &b[13usize]) as i8),
-(i8::eq(&a[14usize], &b[14usize]) as i8),
-(i8::eq(&a[15usize], &b[15usize]) as i8),
]
.simd_into(self)
}
#[inline(always)]
fn simd_lt_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
[
-(i8::lt(&a[0usize], &b[0usize]) as i8),
-(i8::lt(&a[1usize], &b[1usize]) as i8),
-(i8::lt(&a[2usize], &b[2usize]) as i8),
-(i8::lt(&a[3usize], &b[3usize]) as i8),
-(i8::lt(&a[4usize], &b[4usize]) as i8),
-(i8::lt(&a[5usize], &b[5usize]) as i8),
-(i8::lt(&a[6usize], &b[6usize]) as i8),
-(i8::lt(&a[7usize], &b[7usize]) as i8),
-(i8::lt(&a[8usize], &b[8usize]) as i8),
-(i8::lt(&a[9usize], &b[9usize]) as i8),
-(i8::lt(&a[10usize], &b[10usize]) as i8),
-(i8::lt(&a[11usize], &b[11usize]) as i8),
-(i8::lt(&a[12usize], &b[12usize]) as i8),
-(i8::lt(&a[13usize], &b[13usize]) as i8),
-(i8::lt(&a[14usize], &b[14usize]) as i8),
-(i8::lt(&a[15usize], &b[15usize]) as i8),
]
.simd_into(self)
}
#[inline(always)]
fn simd_le_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
[
-(i8::le(&a[0usize], &b[0usize]) as i8),
-(i8::le(&a[1usize], &b[1usize]) as i8),
-(i8::le(&a[2usize], &b[2usize]) as i8),
-(i8::le(&a[3usize], &b[3usize]) as i8),
-(i8::le(&a[4usize], &b[4usize]) as i8),
-(i8::le(&a[5usize], &b[5usize]) as i8),
-(i8::le(&a[6usize], &b[6usize]) as i8),
-(i8::le(&a[7usize], &b[7usize]) as i8),
-(i8::le(&a[8usize], &b[8usize]) as i8),
-(i8::le(&a[9usize], &b[9usize]) as i8),
-(i8::le(&a[10usize], &b[10usize]) as i8),
-(i8::le(&a[11usize], &b[11usize]) as i8),
-(i8::le(&a[12usize], &b[12usize]) as i8),
-(i8::le(&a[13usize], &b[13usize]) as i8),
-(i8::le(&a[14usize], &b[14usize]) as i8),
-(i8::le(&a[15usize], &b[15usize]) as i8),
]
.simd_into(self)
}
#[inline(always)]
fn simd_ge_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
[
-(i8::ge(&a[0usize], &b[0usize]) as i8),
-(i8::ge(&a[1usize], &b[1usize]) as i8),
-(i8::ge(&a[2usize], &b[2usize]) as i8),
-(i8::ge(&a[3usize], &b[3usize]) as i8),
-(i8::ge(&a[4usize], &b[4usize]) as i8),
-(i8::ge(&a[5usize], &b[5usize]) as i8),
-(i8::ge(&a[6usize], &b[6usize]) as i8),
-(i8::ge(&a[7usize], &b[7usize]) as i8),
-(i8::ge(&a[8usize], &b[8usize]) as i8),
-(i8::ge(&a[9usize], &b[9usize]) as i8),
-(i8::ge(&a[10usize], &b[10usize]) as i8),
-(i8::ge(&a[11usize], &b[11usize]) as i8),
-(i8::ge(&a[12usize], &b[12usize]) as i8),
-(i8::ge(&a[13usize], &b[13usize]) as i8),
-(i8::ge(&a[14usize], &b[14usize]) as i8),
-(i8::ge(&a[15usize], &b[15usize]) as i8),
]
.simd_into(self)
}
#[inline(always)]
fn simd_gt_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
[
-(i8::gt(&a[0usize], &b[0usize]) as i8),
-(i8::gt(&a[1usize], &b[1usize]) as i8),
-(i8::gt(&a[2usize], &b[2usize]) as i8),
-(i8::gt(&a[3usize], &b[3usize]) as i8),
-(i8::gt(&a[4usize], &b[4usize]) as i8),
-(i8::gt(&a[5usize], &b[5usize]) as i8),
-(i8::gt(&a[6usize], &b[6usize]) as i8),
-(i8::gt(&a[7usize], &b[7usize]) as i8),
-(i8::gt(&a[8usize], &b[8usize]) as i8),
-(i8::gt(&a[9usize], &b[9usize]) as i8),
-(i8::gt(&a[10usize], &b[10usize]) as i8),
-(i8::gt(&a[11usize], &b[11usize]) as i8),
-(i8::gt(&a[12usize], &b[12usize]) as i8),
-(i8::gt(&a[13usize], &b[13usize]) as i8),
-(i8::gt(&a[14usize], &b[14usize]) as i8),
-(i8::gt(&a[15usize], &b[15usize]) as i8),
]
.simd_into(self)
}
#[inline(always)]
fn zip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
[
a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
]
.simd_into(self)
}
#[inline(always)]
fn zip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
[
a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize],
b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize],
a[15usize], b[15usize],
]
.simd_into(self)
}
#[inline(always)]
fn unzip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
[
a[0usize], a[2usize], a[4usize], a[6usize], a[8usize], a[10usize], a[12usize],
a[14usize], b[0usize], b[2usize], b[4usize], b[6usize], b[8usize], b[10usize],
b[12usize], b[14usize],
]
.simd_into(self)
}
#[inline(always)]
fn unzip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
[
a[1usize], a[3usize], a[5usize], a[7usize], a[9usize], a[11usize], a[13usize],
a[15usize], b[1usize], b[3usize], b[5usize], b[7usize], b[9usize], b[11usize],
b[13usize], b[15usize],
]
.simd_into(self)
}
#[inline(always)]
fn select_i8x16(self, a: mask8x16<Self>, b: i8x16<Self>, c: i8x16<Self>) -> i8x16<Self> {
[
if a[0usize] != 0 { b[0usize] } else { c[0usize] },
if a[1usize] != 0 { b[1usize] } else { c[1usize] },
if a[2usize] != 0 { b[2usize] } else { c[2usize] },
if a[3usize] != 0 { b[3usize] } else { c[3usize] },
if a[4usize] != 0 { b[4usize] } else { c[4usize] },
if a[5usize] != 0 { b[5usize] } else { c[5usize] },
if a[6usize] != 0 { b[6usize] } else { c[6usize] },
if a[7usize] != 0 { b[7usize] } else { c[7usize] },
if a[8usize] != 0 { b[8usize] } else { c[8usize] },
if a[9usize] != 0 { b[9usize] } else { c[9usize] },
if a[10usize] != 0 {
b[10usize]
} else {
c[10usize]
},
if a[11usize] != 0 {
b[11usize]
} else {
c[11usize]
},
if a[12usize] != 0 {
b[12usize]
} else {
c[12usize]
},
if a[13usize] != 0 {
b[13usize]
} else {
c[13usize]
},
if a[14usize] != 0 {
b[14usize]
} else {
c[14usize]
},
if a[15usize] != 0 {
b[15usize]
} else {
c[15usize]
},
]
.simd_into(self)
}
#[inline(always)]
fn min_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
[
i8::min(a[0usize], b[0usize]),
i8::min(a[1usize], b[1usize]),
i8::min(a[2usize], b[2usize]),
i8::min(a[3usize], b[3usize]),
i8::min(a[4usize], b[4usize]),
i8::min(a[5usize], b[5usize]),
i8::min(a[6usize], b[6usize]),
i8::min(a[7usize], b[7usize]),
i8::min(a[8usize], b[8usize]),
i8::min(a[9usize], b[9usize]),
i8::min(a[10usize], b[10usize]),
i8::min(a[11usize], b[11usize]),
i8::min(a[12usize], b[12usize]),
i8::min(a[13usize], b[13usize]),
i8::min(a[14usize], b[14usize]),
i8::min(a[15usize], b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn max_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
[
i8::max(a[0usize], b[0usize]),
i8::max(a[1usize], b[1usize]),
i8::max(a[2usize], b[2usize]),
i8::max(a[3usize], b[3usize]),
i8::max(a[4usize], b[4usize]),
i8::max(a[5usize], b[5usize]),
i8::max(a[6usize], b[6usize]),
i8::max(a[7usize], b[7usize]),
i8::max(a[8usize], b[8usize]),
i8::max(a[9usize], b[9usize]),
i8::max(a[10usize], b[10usize]),
i8::max(a[11usize], b[11usize]),
i8::max(a[12usize], b[12usize]),
i8::max(a[13usize], b[13usize]),
i8::max(a[14usize], b[14usize]),
i8::max(a[15usize], b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn combine_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x32<Self> {
let mut result = [0; 32usize];
result[0..16usize].copy_from_slice(&a.val.0);
result[16usize..32usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn neg_i8x16(self, a: i8x16<Self>) -> i8x16<Self> {
[
i8::neg(a[0usize]),
i8::neg(a[1usize]),
i8::neg(a[2usize]),
i8::neg(a[3usize]),
i8::neg(a[4usize]),
i8::neg(a[5usize]),
i8::neg(a[6usize]),
i8::neg(a[7usize]),
i8::neg(a[8usize]),
i8::neg(a[9usize]),
i8::neg(a[10usize]),
i8::neg(a[11usize]),
i8::neg(a[12usize]),
i8::neg(a[13usize]),
i8::neg(a[14usize]),
i8::neg(a[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn reinterpret_u8_i8x16(self, a: i8x16<Self>) -> u8x16<Self> {
a.bitcast()
}
#[inline(always)]
fn reinterpret_u32_i8x16(self, a: i8x16<Self>) -> u32x4<Self> {
a.bitcast()
}
#[inline(always)]
fn splat_u8x16(self, val: u8) -> u8x16<Self> {
[val; 16usize].simd_into(self)
}
#[inline(always)]
fn load_array_u8x16(self, val: [u8; 16usize]) -> u8x16<Self> {
u8x16 {
val: crate::support::Aligned128(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u8x16(self, val: &[u8; 16usize]) -> u8x16<Self> {
u8x16 {
val: crate::support::Aligned128(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_u8x16(self, a: u8x16<Self>) -> [u8; 16usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_u8x16(self, a: &u8x16<Self>) -> &[u8; 16usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_u8x16(self, a: &mut u8x16<Self>) -> &mut [u8; 16usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_u8x16(self, a: u8x16<Self>, dest: &mut [u8; 16usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_u8x16(self, a: u8x16<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u8x16(self, a: u8x16<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u8x16<const SHIFT: usize>(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
let mut dest = [Default::default(); 16usize];
dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_u8x16<const SHIFT: usize>(
self,
a: u8x16<Self>,
b: u8x16<Self>,
) -> u8x16<Self> {
self.slide_u8x16::<SHIFT>(a, b)
}
#[inline(always)]
fn add_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
[
u8::wrapping_add(a[0usize], b[0usize]),
u8::wrapping_add(a[1usize], b[1usize]),
u8::wrapping_add(a[2usize], b[2usize]),
u8::wrapping_add(a[3usize], b[3usize]),
u8::wrapping_add(a[4usize], b[4usize]),
u8::wrapping_add(a[5usize], b[5usize]),
u8::wrapping_add(a[6usize], b[6usize]),
u8::wrapping_add(a[7usize], b[7usize]),
u8::wrapping_add(a[8usize], b[8usize]),
u8::wrapping_add(a[9usize], b[9usize]),
u8::wrapping_add(a[10usize], b[10usize]),
u8::wrapping_add(a[11usize], b[11usize]),
u8::wrapping_add(a[12usize], b[12usize]),
u8::wrapping_add(a[13usize], b[13usize]),
u8::wrapping_add(a[14usize], b[14usize]),
u8::wrapping_add(a[15usize], b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn sub_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
[
u8::wrapping_sub(a[0usize], b[0usize]),
u8::wrapping_sub(a[1usize], b[1usize]),
u8::wrapping_sub(a[2usize], b[2usize]),
u8::wrapping_sub(a[3usize], b[3usize]),
u8::wrapping_sub(a[4usize], b[4usize]),
u8::wrapping_sub(a[5usize], b[5usize]),
u8::wrapping_sub(a[6usize], b[6usize]),
u8::wrapping_sub(a[7usize], b[7usize]),
u8::wrapping_sub(a[8usize], b[8usize]),
u8::wrapping_sub(a[9usize], b[9usize]),
u8::wrapping_sub(a[10usize], b[10usize]),
u8::wrapping_sub(a[11usize], b[11usize]),
u8::wrapping_sub(a[12usize], b[12usize]),
u8::wrapping_sub(a[13usize], b[13usize]),
u8::wrapping_sub(a[14usize], b[14usize]),
u8::wrapping_sub(a[15usize], b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn mul_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
[
u8::wrapping_mul(a[0usize], b[0usize]),
u8::wrapping_mul(a[1usize], b[1usize]),
u8::wrapping_mul(a[2usize], b[2usize]),
u8::wrapping_mul(a[3usize], b[3usize]),
u8::wrapping_mul(a[4usize], b[4usize]),
u8::wrapping_mul(a[5usize], b[5usize]),
u8::wrapping_mul(a[6usize], b[6usize]),
u8::wrapping_mul(a[7usize], b[7usize]),
u8::wrapping_mul(a[8usize], b[8usize]),
u8::wrapping_mul(a[9usize], b[9usize]),
u8::wrapping_mul(a[10usize], b[10usize]),
u8::wrapping_mul(a[11usize], b[11usize]),
u8::wrapping_mul(a[12usize], b[12usize]),
u8::wrapping_mul(a[13usize], b[13usize]),
u8::wrapping_mul(a[14usize], b[14usize]),
u8::wrapping_mul(a[15usize], b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn and_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
[
u8::bitand(a[0usize], &b[0usize]),
u8::bitand(a[1usize], &b[1usize]),
u8::bitand(a[2usize], &b[2usize]),
u8::bitand(a[3usize], &b[3usize]),
u8::bitand(a[4usize], &b[4usize]),
u8::bitand(a[5usize], &b[5usize]),
u8::bitand(a[6usize], &b[6usize]),
u8::bitand(a[7usize], &b[7usize]),
u8::bitand(a[8usize], &b[8usize]),
u8::bitand(a[9usize], &b[9usize]),
u8::bitand(a[10usize], &b[10usize]),
u8::bitand(a[11usize], &b[11usize]),
u8::bitand(a[12usize], &b[12usize]),
u8::bitand(a[13usize], &b[13usize]),
u8::bitand(a[14usize], &b[14usize]),
u8::bitand(a[15usize], &b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn or_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
[
u8::bitor(a[0usize], &b[0usize]),
u8::bitor(a[1usize], &b[1usize]),
u8::bitor(a[2usize], &b[2usize]),
u8::bitor(a[3usize], &b[3usize]),
u8::bitor(a[4usize], &b[4usize]),
u8::bitor(a[5usize], &b[5usize]),
u8::bitor(a[6usize], &b[6usize]),
u8::bitor(a[7usize], &b[7usize]),
u8::bitor(a[8usize], &b[8usize]),
u8::bitor(a[9usize], &b[9usize]),
u8::bitor(a[10usize], &b[10usize]),
u8::bitor(a[11usize], &b[11usize]),
u8::bitor(a[12usize], &b[12usize]),
u8::bitor(a[13usize], &b[13usize]),
u8::bitor(a[14usize], &b[14usize]),
u8::bitor(a[15usize], &b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn xor_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
[
u8::bitxor(a[0usize], &b[0usize]),
u8::bitxor(a[1usize], &b[1usize]),
u8::bitxor(a[2usize], &b[2usize]),
u8::bitxor(a[3usize], &b[3usize]),
u8::bitxor(a[4usize], &b[4usize]),
u8::bitxor(a[5usize], &b[5usize]),
u8::bitxor(a[6usize], &b[6usize]),
u8::bitxor(a[7usize], &b[7usize]),
u8::bitxor(a[8usize], &b[8usize]),
u8::bitxor(a[9usize], &b[9usize]),
u8::bitxor(a[10usize], &b[10usize]),
u8::bitxor(a[11usize], &b[11usize]),
u8::bitxor(a[12usize], &b[12usize]),
u8::bitxor(a[13usize], &b[13usize]),
u8::bitxor(a[14usize], &b[14usize]),
u8::bitxor(a[15usize], &b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn not_u8x16(self, a: u8x16<Self>) -> u8x16<Self> {
[
u8::not(a[0usize]),
u8::not(a[1usize]),
u8::not(a[2usize]),
u8::not(a[3usize]),
u8::not(a[4usize]),
u8::not(a[5usize]),
u8::not(a[6usize]),
u8::not(a[7usize]),
u8::not(a[8usize]),
u8::not(a[9usize]),
u8::not(a[10usize]),
u8::not(a[11usize]),
u8::not(a[12usize]),
u8::not(a[13usize]),
u8::not(a[14usize]),
u8::not(a[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn shl_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
[
u8::shl(a[0usize], shift),
u8::shl(a[1usize], shift),
u8::shl(a[2usize], shift),
u8::shl(a[3usize], shift),
u8::shl(a[4usize], shift),
u8::shl(a[5usize], shift),
u8::shl(a[6usize], shift),
u8::shl(a[7usize], shift),
u8::shl(a[8usize], shift),
u8::shl(a[9usize], shift),
u8::shl(a[10usize], shift),
u8::shl(a[11usize], shift),
u8::shl(a[12usize], shift),
u8::shl(a[13usize], shift),
u8::shl(a[14usize], shift),
u8::shl(a[15usize], shift),
]
.simd_into(self)
}
#[inline(always)]
fn shlv_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
[
u8::shl(a[0usize], &b[0usize]),
u8::shl(a[1usize], &b[1usize]),
u8::shl(a[2usize], &b[2usize]),
u8::shl(a[3usize], &b[3usize]),
u8::shl(a[4usize], &b[4usize]),
u8::shl(a[5usize], &b[5usize]),
u8::shl(a[6usize], &b[6usize]),
u8::shl(a[7usize], &b[7usize]),
u8::shl(a[8usize], &b[8usize]),
u8::shl(a[9usize], &b[9usize]),
u8::shl(a[10usize], &b[10usize]),
u8::shl(a[11usize], &b[11usize]),
u8::shl(a[12usize], &b[12usize]),
u8::shl(a[13usize], &b[13usize]),
u8::shl(a[14usize], &b[14usize]),
u8::shl(a[15usize], &b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn shr_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
[
u8::shr(a[0usize], shift),
u8::shr(a[1usize], shift),
u8::shr(a[2usize], shift),
u8::shr(a[3usize], shift),
u8::shr(a[4usize], shift),
u8::shr(a[5usize], shift),
u8::shr(a[6usize], shift),
u8::shr(a[7usize], shift),
u8::shr(a[8usize], shift),
u8::shr(a[9usize], shift),
u8::shr(a[10usize], shift),
u8::shr(a[11usize], shift),
u8::shr(a[12usize], shift),
u8::shr(a[13usize], shift),
u8::shr(a[14usize], shift),
u8::shr(a[15usize], shift),
]
.simd_into(self)
}
#[inline(always)]
fn shrv_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
[
u8::shr(a[0usize], &b[0usize]),
u8::shr(a[1usize], &b[1usize]),
u8::shr(a[2usize], &b[2usize]),
u8::shr(a[3usize], &b[3usize]),
u8::shr(a[4usize], &b[4usize]),
u8::shr(a[5usize], &b[5usize]),
u8::shr(a[6usize], &b[6usize]),
u8::shr(a[7usize], &b[7usize]),
u8::shr(a[8usize], &b[8usize]),
u8::shr(a[9usize], &b[9usize]),
u8::shr(a[10usize], &b[10usize]),
u8::shr(a[11usize], &b[11usize]),
u8::shr(a[12usize], &b[12usize]),
u8::shr(a[13usize], &b[13usize]),
u8::shr(a[14usize], &b[14usize]),
u8::shr(a[15usize], &b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn simd_eq_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
[
-(u8::eq(&a[0usize], &b[0usize]) as i8),
-(u8::eq(&a[1usize], &b[1usize]) as i8),
-(u8::eq(&a[2usize], &b[2usize]) as i8),
-(u8::eq(&a[3usize], &b[3usize]) as i8),
-(u8::eq(&a[4usize], &b[4usize]) as i8),
-(u8::eq(&a[5usize], &b[5usize]) as i8),
-(u8::eq(&a[6usize], &b[6usize]) as i8),
-(u8::eq(&a[7usize], &b[7usize]) as i8),
-(u8::eq(&a[8usize], &b[8usize]) as i8),
-(u8::eq(&a[9usize], &b[9usize]) as i8),
-(u8::eq(&a[10usize], &b[10usize]) as i8),
-(u8::eq(&a[11usize], &b[11usize]) as i8),
-(u8::eq(&a[12usize], &b[12usize]) as i8),
-(u8::eq(&a[13usize], &b[13usize]) as i8),
-(u8::eq(&a[14usize], &b[14usize]) as i8),
-(u8::eq(&a[15usize], &b[15usize]) as i8),
]
.simd_into(self)
}
#[inline(always)]
fn simd_lt_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
[
-(u8::lt(&a[0usize], &b[0usize]) as i8),
-(u8::lt(&a[1usize], &b[1usize]) as i8),
-(u8::lt(&a[2usize], &b[2usize]) as i8),
-(u8::lt(&a[3usize], &b[3usize]) as i8),
-(u8::lt(&a[4usize], &b[4usize]) as i8),
-(u8::lt(&a[5usize], &b[5usize]) as i8),
-(u8::lt(&a[6usize], &b[6usize]) as i8),
-(u8::lt(&a[7usize], &b[7usize]) as i8),
-(u8::lt(&a[8usize], &b[8usize]) as i8),
-(u8::lt(&a[9usize], &b[9usize]) as i8),
-(u8::lt(&a[10usize], &b[10usize]) as i8),
-(u8::lt(&a[11usize], &b[11usize]) as i8),
-(u8::lt(&a[12usize], &b[12usize]) as i8),
-(u8::lt(&a[13usize], &b[13usize]) as i8),
-(u8::lt(&a[14usize], &b[14usize]) as i8),
-(u8::lt(&a[15usize], &b[15usize]) as i8),
]
.simd_into(self)
}
#[inline(always)]
fn simd_le_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
[
-(u8::le(&a[0usize], &b[0usize]) as i8),
-(u8::le(&a[1usize], &b[1usize]) as i8),
-(u8::le(&a[2usize], &b[2usize]) as i8),
-(u8::le(&a[3usize], &b[3usize]) as i8),
-(u8::le(&a[4usize], &b[4usize]) as i8),
-(u8::le(&a[5usize], &b[5usize]) as i8),
-(u8::le(&a[6usize], &b[6usize]) as i8),
-(u8::le(&a[7usize], &b[7usize]) as i8),
-(u8::le(&a[8usize], &b[8usize]) as i8),
-(u8::le(&a[9usize], &b[9usize]) as i8),
-(u8::le(&a[10usize], &b[10usize]) as i8),
-(u8::le(&a[11usize], &b[11usize]) as i8),
-(u8::le(&a[12usize], &b[12usize]) as i8),
-(u8::le(&a[13usize], &b[13usize]) as i8),
-(u8::le(&a[14usize], &b[14usize]) as i8),
-(u8::le(&a[15usize], &b[15usize]) as i8),
]
.simd_into(self)
}
#[inline(always)]
fn simd_ge_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
[
-(u8::ge(&a[0usize], &b[0usize]) as i8),
-(u8::ge(&a[1usize], &b[1usize]) as i8),
-(u8::ge(&a[2usize], &b[2usize]) as i8),
-(u8::ge(&a[3usize], &b[3usize]) as i8),
-(u8::ge(&a[4usize], &b[4usize]) as i8),
-(u8::ge(&a[5usize], &b[5usize]) as i8),
-(u8::ge(&a[6usize], &b[6usize]) as i8),
-(u8::ge(&a[7usize], &b[7usize]) as i8),
-(u8::ge(&a[8usize], &b[8usize]) as i8),
-(u8::ge(&a[9usize], &b[9usize]) as i8),
-(u8::ge(&a[10usize], &b[10usize]) as i8),
-(u8::ge(&a[11usize], &b[11usize]) as i8),
-(u8::ge(&a[12usize], &b[12usize]) as i8),
-(u8::ge(&a[13usize], &b[13usize]) as i8),
-(u8::ge(&a[14usize], &b[14usize]) as i8),
-(u8::ge(&a[15usize], &b[15usize]) as i8),
]
.simd_into(self)
}
#[inline(always)]
fn simd_gt_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
[
-(u8::gt(&a[0usize], &b[0usize]) as i8),
-(u8::gt(&a[1usize], &b[1usize]) as i8),
-(u8::gt(&a[2usize], &b[2usize]) as i8),
-(u8::gt(&a[3usize], &b[3usize]) as i8),
-(u8::gt(&a[4usize], &b[4usize]) as i8),
-(u8::gt(&a[5usize], &b[5usize]) as i8),
-(u8::gt(&a[6usize], &b[6usize]) as i8),
-(u8::gt(&a[7usize], &b[7usize]) as i8),
-(u8::gt(&a[8usize], &b[8usize]) as i8),
-(u8::gt(&a[9usize], &b[9usize]) as i8),
-(u8::gt(&a[10usize], &b[10usize]) as i8),
-(u8::gt(&a[11usize], &b[11usize]) as i8),
-(u8::gt(&a[12usize], &b[12usize]) as i8),
-(u8::gt(&a[13usize], &b[13usize]) as i8),
-(u8::gt(&a[14usize], &b[14usize]) as i8),
-(u8::gt(&a[15usize], &b[15usize]) as i8),
]
.simd_into(self)
}
#[inline(always)]
fn zip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
[
a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
]
.simd_into(self)
}
#[inline(always)]
fn zip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
[
a[8usize], b[8usize], a[9usize], b[9usize], a[10usize], b[10usize], a[11usize],
b[11usize], a[12usize], b[12usize], a[13usize], b[13usize], a[14usize], b[14usize],
a[15usize], b[15usize],
]
.simd_into(self)
}
#[inline(always)]
fn unzip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
[
a[0usize], a[2usize], a[4usize], a[6usize], a[8usize], a[10usize], a[12usize],
a[14usize], b[0usize], b[2usize], b[4usize], b[6usize], b[8usize], b[10usize],
b[12usize], b[14usize],
]
.simd_into(self)
}
#[inline(always)]
fn unzip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
[
a[1usize], a[3usize], a[5usize], a[7usize], a[9usize], a[11usize], a[13usize],
a[15usize], b[1usize], b[3usize], b[5usize], b[7usize], b[9usize], b[11usize],
b[13usize], b[15usize],
]
.simd_into(self)
}
#[inline(always)]
fn select_u8x16(self, a: mask8x16<Self>, b: u8x16<Self>, c: u8x16<Self>) -> u8x16<Self> {
[
if a[0usize] != 0 { b[0usize] } else { c[0usize] },
if a[1usize] != 0 { b[1usize] } else { c[1usize] },
if a[2usize] != 0 { b[2usize] } else { c[2usize] },
if a[3usize] != 0 { b[3usize] } else { c[3usize] },
if a[4usize] != 0 { b[4usize] } else { c[4usize] },
if a[5usize] != 0 { b[5usize] } else { c[5usize] },
if a[6usize] != 0 { b[6usize] } else { c[6usize] },
if a[7usize] != 0 { b[7usize] } else { c[7usize] },
if a[8usize] != 0 { b[8usize] } else { c[8usize] },
if a[9usize] != 0 { b[9usize] } else { c[9usize] },
if a[10usize] != 0 {
b[10usize]
} else {
c[10usize]
},
if a[11usize] != 0 {
b[11usize]
} else {
c[11usize]
},
if a[12usize] != 0 {
b[12usize]
} else {
c[12usize]
},
if a[13usize] != 0 {
b[13usize]
} else {
c[13usize]
},
if a[14usize] != 0 {
b[14usize]
} else {
c[14usize]
},
if a[15usize] != 0 {
b[15usize]
} else {
c[15usize]
},
]
.simd_into(self)
}
#[inline(always)]
fn min_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
[
u8::min(a[0usize], b[0usize]),
u8::min(a[1usize], b[1usize]),
u8::min(a[2usize], b[2usize]),
u8::min(a[3usize], b[3usize]),
u8::min(a[4usize], b[4usize]),
u8::min(a[5usize], b[5usize]),
u8::min(a[6usize], b[6usize]),
u8::min(a[7usize], b[7usize]),
u8::min(a[8usize], b[8usize]),
u8::min(a[9usize], b[9usize]),
u8::min(a[10usize], b[10usize]),
u8::min(a[11usize], b[11usize]),
u8::min(a[12usize], b[12usize]),
u8::min(a[13usize], b[13usize]),
u8::min(a[14usize], b[14usize]),
u8::min(a[15usize], b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn max_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
[
u8::max(a[0usize], b[0usize]),
u8::max(a[1usize], b[1usize]),
u8::max(a[2usize], b[2usize]),
u8::max(a[3usize], b[3usize]),
u8::max(a[4usize], b[4usize]),
u8::max(a[5usize], b[5usize]),
u8::max(a[6usize], b[6usize]),
u8::max(a[7usize], b[7usize]),
u8::max(a[8usize], b[8usize]),
u8::max(a[9usize], b[9usize]),
u8::max(a[10usize], b[10usize]),
u8::max(a[11usize], b[11usize]),
u8::max(a[12usize], b[12usize]),
u8::max(a[13usize], b[13usize]),
u8::max(a[14usize], b[14usize]),
u8::max(a[15usize], b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn combine_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x32<Self> {
let mut result = [0; 32usize];
result[0..16usize].copy_from_slice(&a.val.0);
result[16usize..32usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn widen_u8x16(self, a: u8x16<Self>) -> u16x16<Self> {
[
a[0usize] as u16,
a[1usize] as u16,
a[2usize] as u16,
a[3usize] as u16,
a[4usize] as u16,
a[5usize] as u16,
a[6usize] as u16,
a[7usize] as u16,
a[8usize] as u16,
a[9usize] as u16,
a[10usize] as u16,
a[11usize] as u16,
a[12usize] as u16,
a[13usize] as u16,
a[14usize] as u16,
a[15usize] as u16,
]
.simd_into(self)
}
#[inline(always)]
fn reinterpret_u32_u8x16(self, a: u8x16<Self>) -> u32x4<Self> {
a.bitcast()
}
#[inline(always)]
fn splat_mask8x16(self, val: i8) -> mask8x16<Self> {
[val; 16usize].simd_into(self)
}
#[inline(always)]
fn load_array_mask8x16(self, val: [i8; 16usize]) -> mask8x16<Self> {
mask8x16 {
val: crate::support::Aligned128(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask8x16(self, val: &[i8; 16usize]) -> mask8x16<Self> {
mask8x16 {
val: crate::support::Aligned128(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_mask8x16(self, a: mask8x16<Self>) -> [i8; 16usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_mask8x16(self, a: &mask8x16<Self>) -> &[i8; 16usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_mask8x16(self, a: &mut mask8x16<Self>) -> &mut [i8; 16usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_mask8x16(self, a: mask8x16<Self>, dest: &mut [i8; 16usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_mask8x16(self, a: u8x16<Self>) -> mask8x16<Self> {
unsafe {
mask8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask8x16(self, a: mask8x16<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask8x16<const SHIFT: usize>(
self,
a: mask8x16<Self>,
b: mask8x16<Self>,
) -> mask8x16<Self> {
let mut dest = [Default::default(); 16usize];
dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_mask8x16<const SHIFT: usize>(
self,
a: mask8x16<Self>,
b: mask8x16<Self>,
) -> mask8x16<Self> {
self.slide_mask8x16::<SHIFT>(a, b)
}
#[inline(always)]
fn and_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
[
i8::bitand(a[0usize], &b[0usize]),
i8::bitand(a[1usize], &b[1usize]),
i8::bitand(a[2usize], &b[2usize]),
i8::bitand(a[3usize], &b[3usize]),
i8::bitand(a[4usize], &b[4usize]),
i8::bitand(a[5usize], &b[5usize]),
i8::bitand(a[6usize], &b[6usize]),
i8::bitand(a[7usize], &b[7usize]),
i8::bitand(a[8usize], &b[8usize]),
i8::bitand(a[9usize], &b[9usize]),
i8::bitand(a[10usize], &b[10usize]),
i8::bitand(a[11usize], &b[11usize]),
i8::bitand(a[12usize], &b[12usize]),
i8::bitand(a[13usize], &b[13usize]),
i8::bitand(a[14usize], &b[14usize]),
i8::bitand(a[15usize], &b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn or_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
[
i8::bitor(a[0usize], &b[0usize]),
i8::bitor(a[1usize], &b[1usize]),
i8::bitor(a[2usize], &b[2usize]),
i8::bitor(a[3usize], &b[3usize]),
i8::bitor(a[4usize], &b[4usize]),
i8::bitor(a[5usize], &b[5usize]),
i8::bitor(a[6usize], &b[6usize]),
i8::bitor(a[7usize], &b[7usize]),
i8::bitor(a[8usize], &b[8usize]),
i8::bitor(a[9usize], &b[9usize]),
i8::bitor(a[10usize], &b[10usize]),
i8::bitor(a[11usize], &b[11usize]),
i8::bitor(a[12usize], &b[12usize]),
i8::bitor(a[13usize], &b[13usize]),
i8::bitor(a[14usize], &b[14usize]),
i8::bitor(a[15usize], &b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn xor_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
[
i8::bitxor(a[0usize], &b[0usize]),
i8::bitxor(a[1usize], &b[1usize]),
i8::bitxor(a[2usize], &b[2usize]),
i8::bitxor(a[3usize], &b[3usize]),
i8::bitxor(a[4usize], &b[4usize]),
i8::bitxor(a[5usize], &b[5usize]),
i8::bitxor(a[6usize], &b[6usize]),
i8::bitxor(a[7usize], &b[7usize]),
i8::bitxor(a[8usize], &b[8usize]),
i8::bitxor(a[9usize], &b[9usize]),
i8::bitxor(a[10usize], &b[10usize]),
i8::bitxor(a[11usize], &b[11usize]),
i8::bitxor(a[12usize], &b[12usize]),
i8::bitxor(a[13usize], &b[13usize]),
i8::bitxor(a[14usize], &b[14usize]),
i8::bitxor(a[15usize], &b[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn not_mask8x16(self, a: mask8x16<Self>) -> mask8x16<Self> {
[
i8::not(a[0usize]),
i8::not(a[1usize]),
i8::not(a[2usize]),
i8::not(a[3usize]),
i8::not(a[4usize]),
i8::not(a[5usize]),
i8::not(a[6usize]),
i8::not(a[7usize]),
i8::not(a[8usize]),
i8::not(a[9usize]),
i8::not(a[10usize]),
i8::not(a[11usize]),
i8::not(a[12usize]),
i8::not(a[13usize]),
i8::not(a[14usize]),
i8::not(a[15usize]),
]
.simd_into(self)
}
#[inline(always)]
fn select_mask8x16(
self,
a: mask8x16<Self>,
b: mask8x16<Self>,
c: mask8x16<Self>,
) -> mask8x16<Self> {
[
if a[0usize] != 0 { b[0usize] } else { c[0usize] },
if a[1usize] != 0 { b[1usize] } else { c[1usize] },
if a[2usize] != 0 { b[2usize] } else { c[2usize] },
if a[3usize] != 0 { b[3usize] } else { c[3usize] },
if a[4usize] != 0 { b[4usize] } else { c[4usize] },
if a[5usize] != 0 { b[5usize] } else { c[5usize] },
if a[6usize] != 0 { b[6usize] } else { c[6usize] },
if a[7usize] != 0 { b[7usize] } else { c[7usize] },
if a[8usize] != 0 { b[8usize] } else { c[8usize] },
if a[9usize] != 0 { b[9usize] } else { c[9usize] },
if a[10usize] != 0 {
b[10usize]
} else {
c[10usize]
},
if a[11usize] != 0 {
b[11usize]
} else {
c[11usize]
},
if a[12usize] != 0 {
b[12usize]
} else {
c[12usize]
},
if a[13usize] != 0 {
b[13usize]
} else {
c[13usize]
},
if a[14usize] != 0 {
b[14usize]
} else {
c[14usize]
},
if a[15usize] != 0 {
b[15usize]
} else {
c[15usize]
},
]
.simd_into(self)
}
#[inline(always)]
fn simd_eq_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
[
-(i8::eq(&a[0usize], &b[0usize]) as i8),
-(i8::eq(&a[1usize], &b[1usize]) as i8),
-(i8::eq(&a[2usize], &b[2usize]) as i8),
-(i8::eq(&a[3usize], &b[3usize]) as i8),
-(i8::eq(&a[4usize], &b[4usize]) as i8),
-(i8::eq(&a[5usize], &b[5usize]) as i8),
-(i8::eq(&a[6usize], &b[6usize]) as i8),
-(i8::eq(&a[7usize], &b[7usize]) as i8),
-(i8::eq(&a[8usize], &b[8usize]) as i8),
-(i8::eq(&a[9usize], &b[9usize]) as i8),
-(i8::eq(&a[10usize], &b[10usize]) as i8),
-(i8::eq(&a[11usize], &b[11usize]) as i8),
-(i8::eq(&a[12usize], &b[12usize]) as i8),
-(i8::eq(&a[13usize], &b[13usize]) as i8),
-(i8::eq(&a[14usize], &b[14usize]) as i8),
-(i8::eq(&a[15usize], &b[15usize]) as i8),
]
.simd_into(self)
}
#[inline(always)]
fn any_true_mask8x16(self, a: mask8x16<Self>) -> bool {
a[0usize] != 0
|| a[1usize] != 0
|| a[2usize] != 0
|| a[3usize] != 0
|| a[4usize] != 0
|| a[5usize] != 0
|| a[6usize] != 0
|| a[7usize] != 0
|| a[8usize] != 0
|| a[9usize] != 0
|| a[10usize] != 0
|| a[11usize] != 0
|| a[12usize] != 0
|| a[13usize] != 0
|| a[14usize] != 0
|| a[15usize] != 0
}
#[inline(always)]
fn all_true_mask8x16(self, a: mask8x16<Self>) -> bool {
a[0usize] != 0
&& a[1usize] != 0
&& a[2usize] != 0
&& a[3usize] != 0
&& a[4usize] != 0
&& a[5usize] != 0
&& a[6usize] != 0
&& a[7usize] != 0
&& a[8usize] != 0
&& a[9usize] != 0
&& a[10usize] != 0
&& a[11usize] != 0
&& a[12usize] != 0
&& a[13usize] != 0
&& a[14usize] != 0
&& a[15usize] != 0
}
#[inline(always)]
fn any_false_mask8x16(self, a: mask8x16<Self>) -> bool {
a[0usize] == 0
|| a[1usize] == 0
|| a[2usize] == 0
|| a[3usize] == 0
|| a[4usize] == 0
|| a[5usize] == 0
|| a[6usize] == 0
|| a[7usize] == 0
|| a[8usize] == 0
|| a[9usize] == 0
|| a[10usize] == 0
|| a[11usize] == 0
|| a[12usize] == 0
|| a[13usize] == 0
|| a[14usize] == 0
|| a[15usize] == 0
}
#[inline(always)]
fn all_false_mask8x16(self, a: mask8x16<Self>) -> bool {
a[0usize] == 0
&& a[1usize] == 0
&& a[2usize] == 0
&& a[3usize] == 0
&& a[4usize] == 0
&& a[5usize] == 0
&& a[6usize] == 0
&& a[7usize] == 0
&& a[8usize] == 0
&& a[9usize] == 0
&& a[10usize] == 0
&& a[11usize] == 0
&& a[12usize] == 0
&& a[13usize] == 0
&& a[14usize] == 0
&& a[15usize] == 0
}
#[inline(always)]
fn combine_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x32<Self> {
let mut result = [0; 32usize];
result[0..16usize].copy_from_slice(&a.val.0);
result[16usize..32usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn splat_i16x8(self, val: i16) -> i16x8<Self> {
[val; 8usize].simd_into(self)
}
#[inline(always)]
fn load_array_i16x8(self, val: [i16; 8usize]) -> i16x8<Self> {
i16x8 {
val: crate::support::Aligned128(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i16x8(self, val: &[i16; 8usize]) -> i16x8<Self> {
i16x8 {
val: crate::support::Aligned128(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_i16x8(self, a: i16x8<Self>) -> [i16; 8usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_i16x8(self, a: &i16x8<Self>) -> &[i16; 8usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_i16x8(self, a: &mut i16x8<Self>) -> &mut [i16; 8usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_i16x8(self, a: i16x8<Self>, dest: &mut [i16; 8usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_i16x8(self, a: u8x16<Self>) -> i16x8<Self> {
unsafe {
i16x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i16x8(self, a: i16x8<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i16x8<const SHIFT: usize>(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
let mut dest = [Default::default(); 8usize];
dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_i16x8<const SHIFT: usize>(
self,
a: i16x8<Self>,
b: i16x8<Self>,
) -> i16x8<Self> {
self.slide_i16x8::<SHIFT>(a, b)
}
#[inline(always)]
fn add_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
[
i16::wrapping_add(a[0usize], b[0usize]),
i16::wrapping_add(a[1usize], b[1usize]),
i16::wrapping_add(a[2usize], b[2usize]),
i16::wrapping_add(a[3usize], b[3usize]),
i16::wrapping_add(a[4usize], b[4usize]),
i16::wrapping_add(a[5usize], b[5usize]),
i16::wrapping_add(a[6usize], b[6usize]),
i16::wrapping_add(a[7usize], b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn sub_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
[
i16::wrapping_sub(a[0usize], b[0usize]),
i16::wrapping_sub(a[1usize], b[1usize]),
i16::wrapping_sub(a[2usize], b[2usize]),
i16::wrapping_sub(a[3usize], b[3usize]),
i16::wrapping_sub(a[4usize], b[4usize]),
i16::wrapping_sub(a[5usize], b[5usize]),
i16::wrapping_sub(a[6usize], b[6usize]),
i16::wrapping_sub(a[7usize], b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn mul_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
[
i16::wrapping_mul(a[0usize], b[0usize]),
i16::wrapping_mul(a[1usize], b[1usize]),
i16::wrapping_mul(a[2usize], b[2usize]),
i16::wrapping_mul(a[3usize], b[3usize]),
i16::wrapping_mul(a[4usize], b[4usize]),
i16::wrapping_mul(a[5usize], b[5usize]),
i16::wrapping_mul(a[6usize], b[6usize]),
i16::wrapping_mul(a[7usize], b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn and_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
[
i16::bitand(a[0usize], &b[0usize]),
i16::bitand(a[1usize], &b[1usize]),
i16::bitand(a[2usize], &b[2usize]),
i16::bitand(a[3usize], &b[3usize]),
i16::bitand(a[4usize], &b[4usize]),
i16::bitand(a[5usize], &b[5usize]),
i16::bitand(a[6usize], &b[6usize]),
i16::bitand(a[7usize], &b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn or_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
[
i16::bitor(a[0usize], &b[0usize]),
i16::bitor(a[1usize], &b[1usize]),
i16::bitor(a[2usize], &b[2usize]),
i16::bitor(a[3usize], &b[3usize]),
i16::bitor(a[4usize], &b[4usize]),
i16::bitor(a[5usize], &b[5usize]),
i16::bitor(a[6usize], &b[6usize]),
i16::bitor(a[7usize], &b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn xor_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
[
i16::bitxor(a[0usize], &b[0usize]),
i16::bitxor(a[1usize], &b[1usize]),
i16::bitxor(a[2usize], &b[2usize]),
i16::bitxor(a[3usize], &b[3usize]),
i16::bitxor(a[4usize], &b[4usize]),
i16::bitxor(a[5usize], &b[5usize]),
i16::bitxor(a[6usize], &b[6usize]),
i16::bitxor(a[7usize], &b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn not_i16x8(self, a: i16x8<Self>) -> i16x8<Self> {
[
i16::not(a[0usize]),
i16::not(a[1usize]),
i16::not(a[2usize]),
i16::not(a[3usize]),
i16::not(a[4usize]),
i16::not(a[5usize]),
i16::not(a[6usize]),
i16::not(a[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn shl_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
[
i16::shl(a[0usize], shift),
i16::shl(a[1usize], shift),
i16::shl(a[2usize], shift),
i16::shl(a[3usize], shift),
i16::shl(a[4usize], shift),
i16::shl(a[5usize], shift),
i16::shl(a[6usize], shift),
i16::shl(a[7usize], shift),
]
.simd_into(self)
}
#[inline(always)]
fn shlv_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
[
i16::shl(a[0usize], &b[0usize]),
i16::shl(a[1usize], &b[1usize]),
i16::shl(a[2usize], &b[2usize]),
i16::shl(a[3usize], &b[3usize]),
i16::shl(a[4usize], &b[4usize]),
i16::shl(a[5usize], &b[5usize]),
i16::shl(a[6usize], &b[6usize]),
i16::shl(a[7usize], &b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn shr_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
[
i16::shr(a[0usize], shift),
i16::shr(a[1usize], shift),
i16::shr(a[2usize], shift),
i16::shr(a[3usize], shift),
i16::shr(a[4usize], shift),
i16::shr(a[5usize], shift),
i16::shr(a[6usize], shift),
i16::shr(a[7usize], shift),
]
.simd_into(self)
}
#[inline(always)]
fn shrv_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
[
i16::shr(a[0usize], &b[0usize]),
i16::shr(a[1usize], &b[1usize]),
i16::shr(a[2usize], &b[2usize]),
i16::shr(a[3usize], &b[3usize]),
i16::shr(a[4usize], &b[4usize]),
i16::shr(a[5usize], &b[5usize]),
i16::shr(a[6usize], &b[6usize]),
i16::shr(a[7usize], &b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn simd_eq_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
[
-(i16::eq(&a[0usize], &b[0usize]) as i16),
-(i16::eq(&a[1usize], &b[1usize]) as i16),
-(i16::eq(&a[2usize], &b[2usize]) as i16),
-(i16::eq(&a[3usize], &b[3usize]) as i16),
-(i16::eq(&a[4usize], &b[4usize]) as i16),
-(i16::eq(&a[5usize], &b[5usize]) as i16),
-(i16::eq(&a[6usize], &b[6usize]) as i16),
-(i16::eq(&a[7usize], &b[7usize]) as i16),
]
.simd_into(self)
}
#[inline(always)]
fn simd_lt_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
[
-(i16::lt(&a[0usize], &b[0usize]) as i16),
-(i16::lt(&a[1usize], &b[1usize]) as i16),
-(i16::lt(&a[2usize], &b[2usize]) as i16),
-(i16::lt(&a[3usize], &b[3usize]) as i16),
-(i16::lt(&a[4usize], &b[4usize]) as i16),
-(i16::lt(&a[5usize], &b[5usize]) as i16),
-(i16::lt(&a[6usize], &b[6usize]) as i16),
-(i16::lt(&a[7usize], &b[7usize]) as i16),
]
.simd_into(self)
}
#[inline(always)]
fn simd_le_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
[
-(i16::le(&a[0usize], &b[0usize]) as i16),
-(i16::le(&a[1usize], &b[1usize]) as i16),
-(i16::le(&a[2usize], &b[2usize]) as i16),
-(i16::le(&a[3usize], &b[3usize]) as i16),
-(i16::le(&a[4usize], &b[4usize]) as i16),
-(i16::le(&a[5usize], &b[5usize]) as i16),
-(i16::le(&a[6usize], &b[6usize]) as i16),
-(i16::le(&a[7usize], &b[7usize]) as i16),
]
.simd_into(self)
}
#[inline(always)]
fn simd_ge_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
[
-(i16::ge(&a[0usize], &b[0usize]) as i16),
-(i16::ge(&a[1usize], &b[1usize]) as i16),
-(i16::ge(&a[2usize], &b[2usize]) as i16),
-(i16::ge(&a[3usize], &b[3usize]) as i16),
-(i16::ge(&a[4usize], &b[4usize]) as i16),
-(i16::ge(&a[5usize], &b[5usize]) as i16),
-(i16::ge(&a[6usize], &b[6usize]) as i16),
-(i16::ge(&a[7usize], &b[7usize]) as i16),
]
.simd_into(self)
}
#[inline(always)]
fn simd_gt_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
[
-(i16::gt(&a[0usize], &b[0usize]) as i16),
-(i16::gt(&a[1usize], &b[1usize]) as i16),
-(i16::gt(&a[2usize], &b[2usize]) as i16),
-(i16::gt(&a[3usize], &b[3usize]) as i16),
-(i16::gt(&a[4usize], &b[4usize]) as i16),
-(i16::gt(&a[5usize], &b[5usize]) as i16),
-(i16::gt(&a[6usize], &b[6usize]) as i16),
-(i16::gt(&a[7usize], &b[7usize]) as i16),
]
.simd_into(self)
}
#[inline(always)]
fn zip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
[
a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
]
.simd_into(self)
}
#[inline(always)]
fn zip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
[
a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
]
.simd_into(self)
}
#[inline(always)]
fn unzip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
[
a[0usize], a[2usize], a[4usize], a[6usize], b[0usize], b[2usize], b[4usize], b[6usize],
]
.simd_into(self)
}
#[inline(always)]
fn unzip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
[
a[1usize], a[3usize], a[5usize], a[7usize], b[1usize], b[3usize], b[5usize], b[7usize],
]
.simd_into(self)
}
#[inline(always)]
fn select_i16x8(self, a: mask16x8<Self>, b: i16x8<Self>, c: i16x8<Self>) -> i16x8<Self> {
[
if a[0usize] != 0 { b[0usize] } else { c[0usize] },
if a[1usize] != 0 { b[1usize] } else { c[1usize] },
if a[2usize] != 0 { b[2usize] } else { c[2usize] },
if a[3usize] != 0 { b[3usize] } else { c[3usize] },
if a[4usize] != 0 { b[4usize] } else { c[4usize] },
if a[5usize] != 0 { b[5usize] } else { c[5usize] },
if a[6usize] != 0 { b[6usize] } else { c[6usize] },
if a[7usize] != 0 { b[7usize] } else { c[7usize] },
]
.simd_into(self)
}
#[inline(always)]
fn min_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
[
i16::min(a[0usize], b[0usize]),
i16::min(a[1usize], b[1usize]),
i16::min(a[2usize], b[2usize]),
i16::min(a[3usize], b[3usize]),
i16::min(a[4usize], b[4usize]),
i16::min(a[5usize], b[5usize]),
i16::min(a[6usize], b[6usize]),
i16::min(a[7usize], b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn max_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
[
i16::max(a[0usize], b[0usize]),
i16::max(a[1usize], b[1usize]),
i16::max(a[2usize], b[2usize]),
i16::max(a[3usize], b[3usize]),
i16::max(a[4usize], b[4usize]),
i16::max(a[5usize], b[5usize]),
i16::max(a[6usize], b[6usize]),
i16::max(a[7usize], b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn combine_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x16<Self> {
let mut result = [0; 16usize];
result[0..8usize].copy_from_slice(&a.val.0);
result[8usize..16usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn neg_i16x8(self, a: i16x8<Self>) -> i16x8<Self> {
[
i16::neg(a[0usize]),
i16::neg(a[1usize]),
i16::neg(a[2usize]),
i16::neg(a[3usize]),
i16::neg(a[4usize]),
i16::neg(a[5usize]),
i16::neg(a[6usize]),
i16::neg(a[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn reinterpret_u8_i16x8(self, a: i16x8<Self>) -> u8x16<Self> {
a.bitcast()
}
#[inline(always)]
fn reinterpret_u32_i16x8(self, a: i16x8<Self>) -> u32x4<Self> {
a.bitcast()
}
#[inline(always)]
fn splat_u16x8(self, val: u16) -> u16x8<Self> {
[val; 8usize].simd_into(self)
}
#[inline(always)]
fn load_array_u16x8(self, val: [u16; 8usize]) -> u16x8<Self> {
u16x8 {
val: crate::support::Aligned128(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u16x8(self, val: &[u16; 8usize]) -> u16x8<Self> {
u16x8 {
val: crate::support::Aligned128(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_u16x8(self, a: u16x8<Self>) -> [u16; 8usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_u16x8(self, a: &u16x8<Self>) -> &[u16; 8usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_u16x8(self, a: &mut u16x8<Self>) -> &mut [u16; 8usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_u16x8(self, a: u16x8<Self>, dest: &mut [u16; 8usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_u16x8(self, a: u8x16<Self>) -> u16x8<Self> {
unsafe {
u16x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u16x8(self, a: u16x8<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u16x8<const SHIFT: usize>(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
let mut dest = [Default::default(); 8usize];
dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_u16x8<const SHIFT: usize>(
self,
a: u16x8<Self>,
b: u16x8<Self>,
) -> u16x8<Self> {
self.slide_u16x8::<SHIFT>(a, b)
}
#[inline(always)]
fn add_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
[
u16::wrapping_add(a[0usize], b[0usize]),
u16::wrapping_add(a[1usize], b[1usize]),
u16::wrapping_add(a[2usize], b[2usize]),
u16::wrapping_add(a[3usize], b[3usize]),
u16::wrapping_add(a[4usize], b[4usize]),
u16::wrapping_add(a[5usize], b[5usize]),
u16::wrapping_add(a[6usize], b[6usize]),
u16::wrapping_add(a[7usize], b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn sub_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
[
u16::wrapping_sub(a[0usize], b[0usize]),
u16::wrapping_sub(a[1usize], b[1usize]),
u16::wrapping_sub(a[2usize], b[2usize]),
u16::wrapping_sub(a[3usize], b[3usize]),
u16::wrapping_sub(a[4usize], b[4usize]),
u16::wrapping_sub(a[5usize], b[5usize]),
u16::wrapping_sub(a[6usize], b[6usize]),
u16::wrapping_sub(a[7usize], b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn mul_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
[
u16::wrapping_mul(a[0usize], b[0usize]),
u16::wrapping_mul(a[1usize], b[1usize]),
u16::wrapping_mul(a[2usize], b[2usize]),
u16::wrapping_mul(a[3usize], b[3usize]),
u16::wrapping_mul(a[4usize], b[4usize]),
u16::wrapping_mul(a[5usize], b[5usize]),
u16::wrapping_mul(a[6usize], b[6usize]),
u16::wrapping_mul(a[7usize], b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn and_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
[
u16::bitand(a[0usize], &b[0usize]),
u16::bitand(a[1usize], &b[1usize]),
u16::bitand(a[2usize], &b[2usize]),
u16::bitand(a[3usize], &b[3usize]),
u16::bitand(a[4usize], &b[4usize]),
u16::bitand(a[5usize], &b[5usize]),
u16::bitand(a[6usize], &b[6usize]),
u16::bitand(a[7usize], &b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn or_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
[
u16::bitor(a[0usize], &b[0usize]),
u16::bitor(a[1usize], &b[1usize]),
u16::bitor(a[2usize], &b[2usize]),
u16::bitor(a[3usize], &b[3usize]),
u16::bitor(a[4usize], &b[4usize]),
u16::bitor(a[5usize], &b[5usize]),
u16::bitor(a[6usize], &b[6usize]),
u16::bitor(a[7usize], &b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn xor_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
[
u16::bitxor(a[0usize], &b[0usize]),
u16::bitxor(a[1usize], &b[1usize]),
u16::bitxor(a[2usize], &b[2usize]),
u16::bitxor(a[3usize], &b[3usize]),
u16::bitxor(a[4usize], &b[4usize]),
u16::bitxor(a[5usize], &b[5usize]),
u16::bitxor(a[6usize], &b[6usize]),
u16::bitxor(a[7usize], &b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn not_u16x8(self, a: u16x8<Self>) -> u16x8<Self> {
[
u16::not(a[0usize]),
u16::not(a[1usize]),
u16::not(a[2usize]),
u16::not(a[3usize]),
u16::not(a[4usize]),
u16::not(a[5usize]),
u16::not(a[6usize]),
u16::not(a[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn shl_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
[
u16::shl(a[0usize], shift),
u16::shl(a[1usize], shift),
u16::shl(a[2usize], shift),
u16::shl(a[3usize], shift),
u16::shl(a[4usize], shift),
u16::shl(a[5usize], shift),
u16::shl(a[6usize], shift),
u16::shl(a[7usize], shift),
]
.simd_into(self)
}
#[inline(always)]
fn shlv_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
[
u16::shl(a[0usize], &b[0usize]),
u16::shl(a[1usize], &b[1usize]),
u16::shl(a[2usize], &b[2usize]),
u16::shl(a[3usize], &b[3usize]),
u16::shl(a[4usize], &b[4usize]),
u16::shl(a[5usize], &b[5usize]),
u16::shl(a[6usize], &b[6usize]),
u16::shl(a[7usize], &b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn shr_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
[
u16::shr(a[0usize], shift),
u16::shr(a[1usize], shift),
u16::shr(a[2usize], shift),
u16::shr(a[3usize], shift),
u16::shr(a[4usize], shift),
u16::shr(a[5usize], shift),
u16::shr(a[6usize], shift),
u16::shr(a[7usize], shift),
]
.simd_into(self)
}
#[inline(always)]
fn shrv_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
[
u16::shr(a[0usize], &b[0usize]),
u16::shr(a[1usize], &b[1usize]),
u16::shr(a[2usize], &b[2usize]),
u16::shr(a[3usize], &b[3usize]),
u16::shr(a[4usize], &b[4usize]),
u16::shr(a[5usize], &b[5usize]),
u16::shr(a[6usize], &b[6usize]),
u16::shr(a[7usize], &b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn simd_eq_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
[
-(u16::eq(&a[0usize], &b[0usize]) as i16),
-(u16::eq(&a[1usize], &b[1usize]) as i16),
-(u16::eq(&a[2usize], &b[2usize]) as i16),
-(u16::eq(&a[3usize], &b[3usize]) as i16),
-(u16::eq(&a[4usize], &b[4usize]) as i16),
-(u16::eq(&a[5usize], &b[5usize]) as i16),
-(u16::eq(&a[6usize], &b[6usize]) as i16),
-(u16::eq(&a[7usize], &b[7usize]) as i16),
]
.simd_into(self)
}
#[inline(always)]
fn simd_lt_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
[
-(u16::lt(&a[0usize], &b[0usize]) as i16),
-(u16::lt(&a[1usize], &b[1usize]) as i16),
-(u16::lt(&a[2usize], &b[2usize]) as i16),
-(u16::lt(&a[3usize], &b[3usize]) as i16),
-(u16::lt(&a[4usize], &b[4usize]) as i16),
-(u16::lt(&a[5usize], &b[5usize]) as i16),
-(u16::lt(&a[6usize], &b[6usize]) as i16),
-(u16::lt(&a[7usize], &b[7usize]) as i16),
]
.simd_into(self)
}
#[inline(always)]
fn simd_le_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
[
-(u16::le(&a[0usize], &b[0usize]) as i16),
-(u16::le(&a[1usize], &b[1usize]) as i16),
-(u16::le(&a[2usize], &b[2usize]) as i16),
-(u16::le(&a[3usize], &b[3usize]) as i16),
-(u16::le(&a[4usize], &b[4usize]) as i16),
-(u16::le(&a[5usize], &b[5usize]) as i16),
-(u16::le(&a[6usize], &b[6usize]) as i16),
-(u16::le(&a[7usize], &b[7usize]) as i16),
]
.simd_into(self)
}
#[inline(always)]
fn simd_ge_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
[
-(u16::ge(&a[0usize], &b[0usize]) as i16),
-(u16::ge(&a[1usize], &b[1usize]) as i16),
-(u16::ge(&a[2usize], &b[2usize]) as i16),
-(u16::ge(&a[3usize], &b[3usize]) as i16),
-(u16::ge(&a[4usize], &b[4usize]) as i16),
-(u16::ge(&a[5usize], &b[5usize]) as i16),
-(u16::ge(&a[6usize], &b[6usize]) as i16),
-(u16::ge(&a[7usize], &b[7usize]) as i16),
]
.simd_into(self)
}
#[inline(always)]
fn simd_gt_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
[
-(u16::gt(&a[0usize], &b[0usize]) as i16),
-(u16::gt(&a[1usize], &b[1usize]) as i16),
-(u16::gt(&a[2usize], &b[2usize]) as i16),
-(u16::gt(&a[3usize], &b[3usize]) as i16),
-(u16::gt(&a[4usize], &b[4usize]) as i16),
-(u16::gt(&a[5usize], &b[5usize]) as i16),
-(u16::gt(&a[6usize], &b[6usize]) as i16),
-(u16::gt(&a[7usize], &b[7usize]) as i16),
]
.simd_into(self)
}
#[inline(always)]
fn zip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
[
a[0usize], b[0usize], a[1usize], b[1usize], a[2usize], b[2usize], a[3usize], b[3usize],
]
.simd_into(self)
}
#[inline(always)]
fn zip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
[
a[4usize], b[4usize], a[5usize], b[5usize], a[6usize], b[6usize], a[7usize], b[7usize],
]
.simd_into(self)
}
#[inline(always)]
fn unzip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
[
a[0usize], a[2usize], a[4usize], a[6usize], b[0usize], b[2usize], b[4usize], b[6usize],
]
.simd_into(self)
}
#[inline(always)]
fn unzip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
[
a[1usize], a[3usize], a[5usize], a[7usize], b[1usize], b[3usize], b[5usize], b[7usize],
]
.simd_into(self)
}
#[inline(always)]
fn select_u16x8(self, a: mask16x8<Self>, b: u16x8<Self>, c: u16x8<Self>) -> u16x8<Self> {
[
if a[0usize] != 0 { b[0usize] } else { c[0usize] },
if a[1usize] != 0 { b[1usize] } else { c[1usize] },
if a[2usize] != 0 { b[2usize] } else { c[2usize] },
if a[3usize] != 0 { b[3usize] } else { c[3usize] },
if a[4usize] != 0 { b[4usize] } else { c[4usize] },
if a[5usize] != 0 { b[5usize] } else { c[5usize] },
if a[6usize] != 0 { b[6usize] } else { c[6usize] },
if a[7usize] != 0 { b[7usize] } else { c[7usize] },
]
.simd_into(self)
}
#[inline(always)]
fn min_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
[
u16::min(a[0usize], b[0usize]),
u16::min(a[1usize], b[1usize]),
u16::min(a[2usize], b[2usize]),
u16::min(a[3usize], b[3usize]),
u16::min(a[4usize], b[4usize]),
u16::min(a[5usize], b[5usize]),
u16::min(a[6usize], b[6usize]),
u16::min(a[7usize], b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn max_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
[
u16::max(a[0usize], b[0usize]),
u16::max(a[1usize], b[1usize]),
u16::max(a[2usize], b[2usize]),
u16::max(a[3usize], b[3usize]),
u16::max(a[4usize], b[4usize]),
u16::max(a[5usize], b[5usize]),
u16::max(a[6usize], b[6usize]),
u16::max(a[7usize], b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn combine_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x16<Self> {
let mut result = [0; 16usize];
result[0..8usize].copy_from_slice(&a.val.0);
result[8usize..16usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn reinterpret_u8_u16x8(self, a: u16x8<Self>) -> u8x16<Self> {
a.bitcast()
}
#[inline(always)]
fn reinterpret_u32_u16x8(self, a: u16x8<Self>) -> u32x4<Self> {
a.bitcast()
}
#[inline(always)]
fn splat_mask16x8(self, val: i16) -> mask16x8<Self> {
[val; 8usize].simd_into(self)
}
#[inline(always)]
fn load_array_mask16x8(self, val: [i16; 8usize]) -> mask16x8<Self> {
mask16x8 {
val: crate::support::Aligned128(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask16x8(self, val: &[i16; 8usize]) -> mask16x8<Self> {
mask16x8 {
val: crate::support::Aligned128(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_mask16x8(self, a: mask16x8<Self>) -> [i16; 8usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_mask16x8(self, a: &mask16x8<Self>) -> &[i16; 8usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_mask16x8(self, a: &mut mask16x8<Self>) -> &mut [i16; 8usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_mask16x8(self, a: mask16x8<Self>, dest: &mut [i16; 8usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_mask16x8(self, a: u8x16<Self>) -> mask16x8<Self> {
unsafe {
mask16x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask16x8(self, a: mask16x8<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask16x8<const SHIFT: usize>(
self,
a: mask16x8<Self>,
b: mask16x8<Self>,
) -> mask16x8<Self> {
let mut dest = [Default::default(); 8usize];
dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_mask16x8<const SHIFT: usize>(
self,
a: mask16x8<Self>,
b: mask16x8<Self>,
) -> mask16x8<Self> {
self.slide_mask16x8::<SHIFT>(a, b)
}
#[inline(always)]
fn and_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
[
i16::bitand(a[0usize], &b[0usize]),
i16::bitand(a[1usize], &b[1usize]),
i16::bitand(a[2usize], &b[2usize]),
i16::bitand(a[3usize], &b[3usize]),
i16::bitand(a[4usize], &b[4usize]),
i16::bitand(a[5usize], &b[5usize]),
i16::bitand(a[6usize], &b[6usize]),
i16::bitand(a[7usize], &b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn or_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
[
i16::bitor(a[0usize], &b[0usize]),
i16::bitor(a[1usize], &b[1usize]),
i16::bitor(a[2usize], &b[2usize]),
i16::bitor(a[3usize], &b[3usize]),
i16::bitor(a[4usize], &b[4usize]),
i16::bitor(a[5usize], &b[5usize]),
i16::bitor(a[6usize], &b[6usize]),
i16::bitor(a[7usize], &b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn xor_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
[
i16::bitxor(a[0usize], &b[0usize]),
i16::bitxor(a[1usize], &b[1usize]),
i16::bitxor(a[2usize], &b[2usize]),
i16::bitxor(a[3usize], &b[3usize]),
i16::bitxor(a[4usize], &b[4usize]),
i16::bitxor(a[5usize], &b[5usize]),
i16::bitxor(a[6usize], &b[6usize]),
i16::bitxor(a[7usize], &b[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn not_mask16x8(self, a: mask16x8<Self>) -> mask16x8<Self> {
[
i16::not(a[0usize]),
i16::not(a[1usize]),
i16::not(a[2usize]),
i16::not(a[3usize]),
i16::not(a[4usize]),
i16::not(a[5usize]),
i16::not(a[6usize]),
i16::not(a[7usize]),
]
.simd_into(self)
}
#[inline(always)]
fn select_mask16x8(
self,
a: mask16x8<Self>,
b: mask16x8<Self>,
c: mask16x8<Self>,
) -> mask16x8<Self> {
[
if a[0usize] != 0 { b[0usize] } else { c[0usize] },
if a[1usize] != 0 { b[1usize] } else { c[1usize] },
if a[2usize] != 0 { b[2usize] } else { c[2usize] },
if a[3usize] != 0 { b[3usize] } else { c[3usize] },
if a[4usize] != 0 { b[4usize] } else { c[4usize] },
if a[5usize] != 0 { b[5usize] } else { c[5usize] },
if a[6usize] != 0 { b[6usize] } else { c[6usize] },
if a[7usize] != 0 { b[7usize] } else { c[7usize] },
]
.simd_into(self)
}
#[inline(always)]
fn simd_eq_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
[
-(i16::eq(&a[0usize], &b[0usize]) as i16),
-(i16::eq(&a[1usize], &b[1usize]) as i16),
-(i16::eq(&a[2usize], &b[2usize]) as i16),
-(i16::eq(&a[3usize], &b[3usize]) as i16),
-(i16::eq(&a[4usize], &b[4usize]) as i16),
-(i16::eq(&a[5usize], &b[5usize]) as i16),
-(i16::eq(&a[6usize], &b[6usize]) as i16),
-(i16::eq(&a[7usize], &b[7usize]) as i16),
]
.simd_into(self)
}
#[inline(always)]
fn any_true_mask16x8(self, a: mask16x8<Self>) -> bool {
a[0usize] != 0
|| a[1usize] != 0
|| a[2usize] != 0
|| a[3usize] != 0
|| a[4usize] != 0
|| a[5usize] != 0
|| a[6usize] != 0
|| a[7usize] != 0
}
#[inline(always)]
fn all_true_mask16x8(self, a: mask16x8<Self>) -> bool {
a[0usize] != 0
&& a[1usize] != 0
&& a[2usize] != 0
&& a[3usize] != 0
&& a[4usize] != 0
&& a[5usize] != 0
&& a[6usize] != 0
&& a[7usize] != 0
}
#[inline(always)]
fn any_false_mask16x8(self, a: mask16x8<Self>) -> bool {
a[0usize] == 0
|| a[1usize] == 0
|| a[2usize] == 0
|| a[3usize] == 0
|| a[4usize] == 0
|| a[5usize] == 0
|| a[6usize] == 0
|| a[7usize] == 0
}
#[inline(always)]
fn all_false_mask16x8(self, a: mask16x8<Self>) -> bool {
a[0usize] == 0
&& a[1usize] == 0
&& a[2usize] == 0
&& a[3usize] == 0
&& a[4usize] == 0
&& a[5usize] == 0
&& a[6usize] == 0
&& a[7usize] == 0
}
#[inline(always)]
fn combine_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x16<Self> {
let mut result = [0; 16usize];
result[0..8usize].copy_from_slice(&a.val.0);
result[8usize..16usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn splat_i32x4(self, val: i32) -> i32x4<Self> {
[val; 4usize].simd_into(self)
}
#[inline(always)]
fn load_array_i32x4(self, val: [i32; 4usize]) -> i32x4<Self> {
i32x4 {
val: crate::support::Aligned128(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i32x4(self, val: &[i32; 4usize]) -> i32x4<Self> {
i32x4 {
val: crate::support::Aligned128(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_i32x4(self, a: i32x4<Self>) -> [i32; 4usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_i32x4(self, a: &i32x4<Self>) -> &[i32; 4usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_i32x4(self, a: &mut i32x4<Self>) -> &mut [i32; 4usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_i32x4(self, a: i32x4<Self>, dest: &mut [i32; 4usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_i32x4(self, a: u8x16<Self>) -> i32x4<Self> {
unsafe {
i32x4 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i32x4(self, a: i32x4<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i32x4<const SHIFT: usize>(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
let mut dest = [Default::default(); 4usize];
dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_i32x4<const SHIFT: usize>(
self,
a: i32x4<Self>,
b: i32x4<Self>,
) -> i32x4<Self> {
self.slide_i32x4::<SHIFT>(a, b)
}
#[inline(always)]
fn add_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
[
i32::wrapping_add(a[0usize], b[0usize]),
i32::wrapping_add(a[1usize], b[1usize]),
i32::wrapping_add(a[2usize], b[2usize]),
i32::wrapping_add(a[3usize], b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn sub_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
[
i32::wrapping_sub(a[0usize], b[0usize]),
i32::wrapping_sub(a[1usize], b[1usize]),
i32::wrapping_sub(a[2usize], b[2usize]),
i32::wrapping_sub(a[3usize], b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn mul_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
[
i32::wrapping_mul(a[0usize], b[0usize]),
i32::wrapping_mul(a[1usize], b[1usize]),
i32::wrapping_mul(a[2usize], b[2usize]),
i32::wrapping_mul(a[3usize], b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn and_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
[
i32::bitand(a[0usize], &b[0usize]),
i32::bitand(a[1usize], &b[1usize]),
i32::bitand(a[2usize], &b[2usize]),
i32::bitand(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn or_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
[
i32::bitor(a[0usize], &b[0usize]),
i32::bitor(a[1usize], &b[1usize]),
i32::bitor(a[2usize], &b[2usize]),
i32::bitor(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn xor_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
[
i32::bitxor(a[0usize], &b[0usize]),
i32::bitxor(a[1usize], &b[1usize]),
i32::bitxor(a[2usize], &b[2usize]),
i32::bitxor(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn not_i32x4(self, a: i32x4<Self>) -> i32x4<Self> {
[
i32::not(a[0usize]),
i32::not(a[1usize]),
i32::not(a[2usize]),
i32::not(a[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn shl_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
[
i32::shl(a[0usize], shift),
i32::shl(a[1usize], shift),
i32::shl(a[2usize], shift),
i32::shl(a[3usize], shift),
]
.simd_into(self)
}
#[inline(always)]
fn shlv_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
[
i32::shl(a[0usize], &b[0usize]),
i32::shl(a[1usize], &b[1usize]),
i32::shl(a[2usize], &b[2usize]),
i32::shl(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn shr_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
[
i32::shr(a[0usize], shift),
i32::shr(a[1usize], shift),
i32::shr(a[2usize], shift),
i32::shr(a[3usize], shift),
]
.simd_into(self)
}
#[inline(always)]
fn shrv_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
[
i32::shr(a[0usize], &b[0usize]),
i32::shr(a[1usize], &b[1usize]),
i32::shr(a[2usize], &b[2usize]),
i32::shr(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn simd_eq_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
[
-(i32::eq(&a[0usize], &b[0usize]) as i32),
-(i32::eq(&a[1usize], &b[1usize]) as i32),
-(i32::eq(&a[2usize], &b[2usize]) as i32),
-(i32::eq(&a[3usize], &b[3usize]) as i32),
]
.simd_into(self)
}
#[inline(always)]
fn simd_lt_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
[
-(i32::lt(&a[0usize], &b[0usize]) as i32),
-(i32::lt(&a[1usize], &b[1usize]) as i32),
-(i32::lt(&a[2usize], &b[2usize]) as i32),
-(i32::lt(&a[3usize], &b[3usize]) as i32),
]
.simd_into(self)
}
#[inline(always)]
fn simd_le_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
[
-(i32::le(&a[0usize], &b[0usize]) as i32),
-(i32::le(&a[1usize], &b[1usize]) as i32),
-(i32::le(&a[2usize], &b[2usize]) as i32),
-(i32::le(&a[3usize], &b[3usize]) as i32),
]
.simd_into(self)
}
#[inline(always)]
fn simd_ge_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
[
-(i32::ge(&a[0usize], &b[0usize]) as i32),
-(i32::ge(&a[1usize], &b[1usize]) as i32),
-(i32::ge(&a[2usize], &b[2usize]) as i32),
-(i32::ge(&a[3usize], &b[3usize]) as i32),
]
.simd_into(self)
}
#[inline(always)]
fn simd_gt_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
[
-(i32::gt(&a[0usize], &b[0usize]) as i32),
-(i32::gt(&a[1usize], &b[1usize]) as i32),
-(i32::gt(&a[2usize], &b[2usize]) as i32),
-(i32::gt(&a[3usize], &b[3usize]) as i32),
]
.simd_into(self)
}
#[inline(always)]
fn zip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
[a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
}
#[inline(always)]
fn zip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
[a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
}
#[inline(always)]
fn unzip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
[a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
}
#[inline(always)]
fn unzip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
[a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
}
#[inline(always)]
fn select_i32x4(self, a: mask32x4<Self>, b: i32x4<Self>, c: i32x4<Self>) -> i32x4<Self> {
[
if a[0usize] != 0 { b[0usize] } else { c[0usize] },
if a[1usize] != 0 { b[1usize] } else { c[1usize] },
if a[2usize] != 0 { b[2usize] } else { c[2usize] },
if a[3usize] != 0 { b[3usize] } else { c[3usize] },
]
.simd_into(self)
}
#[inline(always)]
fn min_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
[
i32::min(a[0usize], b[0usize]),
i32::min(a[1usize], b[1usize]),
i32::min(a[2usize], b[2usize]),
i32::min(a[3usize], b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn max_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
[
i32::max(a[0usize], b[0usize]),
i32::max(a[1usize], b[1usize]),
i32::max(a[2usize], b[2usize]),
i32::max(a[3usize], b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn combine_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x8<Self> {
let mut result = [0; 8usize];
result[0..4usize].copy_from_slice(&a.val.0);
result[4usize..8usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn neg_i32x4(self, a: i32x4<Self>) -> i32x4<Self> {
[
i32::neg(a[0usize]),
i32::neg(a[1usize]),
i32::neg(a[2usize]),
i32::neg(a[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn reinterpret_u8_i32x4(self, a: i32x4<Self>) -> u8x16<Self> {
a.bitcast()
}
#[inline(always)]
fn reinterpret_u32_i32x4(self, a: i32x4<Self>) -> u32x4<Self> {
a.bitcast()
}
#[inline(always)]
fn cvt_f32_i32x4(self, a: i32x4<Self>) -> f32x4<Self> {
[
a[0usize] as f32,
a[1usize] as f32,
a[2usize] as f32,
a[3usize] as f32,
]
.simd_into(self)
}
#[inline(always)]
fn splat_u32x4(self, val: u32) -> u32x4<Self> {
[val; 4usize].simd_into(self)
}
#[inline(always)]
fn load_array_u32x4(self, val: [u32; 4usize]) -> u32x4<Self> {
u32x4 {
val: crate::support::Aligned128(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u32x4(self, val: &[u32; 4usize]) -> u32x4<Self> {
u32x4 {
val: crate::support::Aligned128(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_u32x4(self, a: u32x4<Self>) -> [u32; 4usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_u32x4(self, a: &u32x4<Self>) -> &[u32; 4usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_u32x4(self, a: &mut u32x4<Self>) -> &mut [u32; 4usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_u32x4(self, a: u32x4<Self>, dest: &mut [u32; 4usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_u32x4(self, a: u8x16<Self>) -> u32x4<Self> {
unsafe {
u32x4 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u32x4(self, a: u32x4<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u32x4<const SHIFT: usize>(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
let mut dest = [Default::default(); 4usize];
dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_u32x4<const SHIFT: usize>(
self,
a: u32x4<Self>,
b: u32x4<Self>,
) -> u32x4<Self> {
self.slide_u32x4::<SHIFT>(a, b)
}
#[inline(always)]
fn add_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
[
u32::wrapping_add(a[0usize], b[0usize]),
u32::wrapping_add(a[1usize], b[1usize]),
u32::wrapping_add(a[2usize], b[2usize]),
u32::wrapping_add(a[3usize], b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn sub_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
[
u32::wrapping_sub(a[0usize], b[0usize]),
u32::wrapping_sub(a[1usize], b[1usize]),
u32::wrapping_sub(a[2usize], b[2usize]),
u32::wrapping_sub(a[3usize], b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn mul_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
[
u32::wrapping_mul(a[0usize], b[0usize]),
u32::wrapping_mul(a[1usize], b[1usize]),
u32::wrapping_mul(a[2usize], b[2usize]),
u32::wrapping_mul(a[3usize], b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn and_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
[
u32::bitand(a[0usize], &b[0usize]),
u32::bitand(a[1usize], &b[1usize]),
u32::bitand(a[2usize], &b[2usize]),
u32::bitand(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn or_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
[
u32::bitor(a[0usize], &b[0usize]),
u32::bitor(a[1usize], &b[1usize]),
u32::bitor(a[2usize], &b[2usize]),
u32::bitor(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn xor_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
[
u32::bitxor(a[0usize], &b[0usize]),
u32::bitxor(a[1usize], &b[1usize]),
u32::bitxor(a[2usize], &b[2usize]),
u32::bitxor(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn not_u32x4(self, a: u32x4<Self>) -> u32x4<Self> {
[
u32::not(a[0usize]),
u32::not(a[1usize]),
u32::not(a[2usize]),
u32::not(a[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn shl_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
[
u32::shl(a[0usize], shift),
u32::shl(a[1usize], shift),
u32::shl(a[2usize], shift),
u32::shl(a[3usize], shift),
]
.simd_into(self)
}
#[inline(always)]
fn shlv_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
[
u32::shl(a[0usize], &b[0usize]),
u32::shl(a[1usize], &b[1usize]),
u32::shl(a[2usize], &b[2usize]),
u32::shl(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn shr_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
[
u32::shr(a[0usize], shift),
u32::shr(a[1usize], shift),
u32::shr(a[2usize], shift),
u32::shr(a[3usize], shift),
]
.simd_into(self)
}
#[inline(always)]
fn shrv_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
[
u32::shr(a[0usize], &b[0usize]),
u32::shr(a[1usize], &b[1usize]),
u32::shr(a[2usize], &b[2usize]),
u32::shr(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn simd_eq_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
[
-(u32::eq(&a[0usize], &b[0usize]) as i32),
-(u32::eq(&a[1usize], &b[1usize]) as i32),
-(u32::eq(&a[2usize], &b[2usize]) as i32),
-(u32::eq(&a[3usize], &b[3usize]) as i32),
]
.simd_into(self)
}
#[inline(always)]
fn simd_lt_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
[
-(u32::lt(&a[0usize], &b[0usize]) as i32),
-(u32::lt(&a[1usize], &b[1usize]) as i32),
-(u32::lt(&a[2usize], &b[2usize]) as i32),
-(u32::lt(&a[3usize], &b[3usize]) as i32),
]
.simd_into(self)
}
#[inline(always)]
fn simd_le_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
[
-(u32::le(&a[0usize], &b[0usize]) as i32),
-(u32::le(&a[1usize], &b[1usize]) as i32),
-(u32::le(&a[2usize], &b[2usize]) as i32),
-(u32::le(&a[3usize], &b[3usize]) as i32),
]
.simd_into(self)
}
#[inline(always)]
fn simd_ge_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
[
-(u32::ge(&a[0usize], &b[0usize]) as i32),
-(u32::ge(&a[1usize], &b[1usize]) as i32),
-(u32::ge(&a[2usize], &b[2usize]) as i32),
-(u32::ge(&a[3usize], &b[3usize]) as i32),
]
.simd_into(self)
}
#[inline(always)]
fn simd_gt_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
[
-(u32::gt(&a[0usize], &b[0usize]) as i32),
-(u32::gt(&a[1usize], &b[1usize]) as i32),
-(u32::gt(&a[2usize], &b[2usize]) as i32),
-(u32::gt(&a[3usize], &b[3usize]) as i32),
]
.simd_into(self)
}
#[inline(always)]
fn zip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
[a[0usize], b[0usize], a[1usize], b[1usize]].simd_into(self)
}
#[inline(always)]
fn zip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
[a[2usize], b[2usize], a[3usize], b[3usize]].simd_into(self)
}
#[inline(always)]
fn unzip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
[a[0usize], a[2usize], b[0usize], b[2usize]].simd_into(self)
}
#[inline(always)]
fn unzip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
[a[1usize], a[3usize], b[1usize], b[3usize]].simd_into(self)
}
#[inline(always)]
fn select_u32x4(self, a: mask32x4<Self>, b: u32x4<Self>, c: u32x4<Self>) -> u32x4<Self> {
[
if a[0usize] != 0 { b[0usize] } else { c[0usize] },
if a[1usize] != 0 { b[1usize] } else { c[1usize] },
if a[2usize] != 0 { b[2usize] } else { c[2usize] },
if a[3usize] != 0 { b[3usize] } else { c[3usize] },
]
.simd_into(self)
}
#[inline(always)]
fn min_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
[
u32::min(a[0usize], b[0usize]),
u32::min(a[1usize], b[1usize]),
u32::min(a[2usize], b[2usize]),
u32::min(a[3usize], b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn max_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
[
u32::max(a[0usize], b[0usize]),
u32::max(a[1usize], b[1usize]),
u32::max(a[2usize], b[2usize]),
u32::max(a[3usize], b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn combine_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x8<Self> {
let mut result = [0; 8usize];
result[0..4usize].copy_from_slice(&a.val.0);
result[4usize..8usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn reinterpret_u8_u32x4(self, a: u32x4<Self>) -> u8x16<Self> {
a.bitcast()
}
#[inline(always)]
fn cvt_f32_u32x4(self, a: u32x4<Self>) -> f32x4<Self> {
[
a[0usize] as f32,
a[1usize] as f32,
a[2usize] as f32,
a[3usize] as f32,
]
.simd_into(self)
}
#[inline(always)]
fn splat_mask32x4(self, val: i32) -> mask32x4<Self> {
[val; 4usize].simd_into(self)
}
#[inline(always)]
fn load_array_mask32x4(self, val: [i32; 4usize]) -> mask32x4<Self> {
mask32x4 {
val: crate::support::Aligned128(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask32x4(self, val: &[i32; 4usize]) -> mask32x4<Self> {
mask32x4 {
val: crate::support::Aligned128(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_mask32x4(self, a: mask32x4<Self>) -> [i32; 4usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_mask32x4(self, a: &mask32x4<Self>) -> &[i32; 4usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_mask32x4(self, a: &mut mask32x4<Self>) -> &mut [i32; 4usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_mask32x4(self, a: mask32x4<Self>, dest: &mut [i32; 4usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_mask32x4(self, a: u8x16<Self>) -> mask32x4<Self> {
unsafe {
mask32x4 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask32x4(self, a: mask32x4<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask32x4<const SHIFT: usize>(
self,
a: mask32x4<Self>,
b: mask32x4<Self>,
) -> mask32x4<Self> {
let mut dest = [Default::default(); 4usize];
dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_mask32x4<const SHIFT: usize>(
self,
a: mask32x4<Self>,
b: mask32x4<Self>,
) -> mask32x4<Self> {
self.slide_mask32x4::<SHIFT>(a, b)
}
#[inline(always)]
fn and_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
[
i32::bitand(a[0usize], &b[0usize]),
i32::bitand(a[1usize], &b[1usize]),
i32::bitand(a[2usize], &b[2usize]),
i32::bitand(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn or_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
[
i32::bitor(a[0usize], &b[0usize]),
i32::bitor(a[1usize], &b[1usize]),
i32::bitor(a[2usize], &b[2usize]),
i32::bitor(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn xor_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
[
i32::bitxor(a[0usize], &b[0usize]),
i32::bitxor(a[1usize], &b[1usize]),
i32::bitxor(a[2usize], &b[2usize]),
i32::bitxor(a[3usize], &b[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn not_mask32x4(self, a: mask32x4<Self>) -> mask32x4<Self> {
[
i32::not(a[0usize]),
i32::not(a[1usize]),
i32::not(a[2usize]),
i32::not(a[3usize]),
]
.simd_into(self)
}
#[inline(always)]
fn select_mask32x4(
self,
a: mask32x4<Self>,
b: mask32x4<Self>,
c: mask32x4<Self>,
) -> mask32x4<Self> {
[
if a[0usize] != 0 { b[0usize] } else { c[0usize] },
if a[1usize] != 0 { b[1usize] } else { c[1usize] },
if a[2usize] != 0 { b[2usize] } else { c[2usize] },
if a[3usize] != 0 { b[3usize] } else { c[3usize] },
]
.simd_into(self)
}
#[inline(always)]
fn simd_eq_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
[
-(i32::eq(&a[0usize], &b[0usize]) as i32),
-(i32::eq(&a[1usize], &b[1usize]) as i32),
-(i32::eq(&a[2usize], &b[2usize]) as i32),
-(i32::eq(&a[3usize], &b[3usize]) as i32),
]
.simd_into(self)
}
#[inline(always)]
fn any_true_mask32x4(self, a: mask32x4<Self>) -> bool {
a[0usize] != 0 || a[1usize] != 0 || a[2usize] != 0 || a[3usize] != 0
}
#[inline(always)]
fn all_true_mask32x4(self, a: mask32x4<Self>) -> bool {
a[0usize] != 0 && a[1usize] != 0 && a[2usize] != 0 && a[3usize] != 0
}
#[inline(always)]
fn any_false_mask32x4(self, a: mask32x4<Self>) -> bool {
a[0usize] == 0 || a[1usize] == 0 || a[2usize] == 0 || a[3usize] == 0
}
#[inline(always)]
fn all_false_mask32x4(self, a: mask32x4<Self>) -> bool {
a[0usize] == 0 && a[1usize] == 0 && a[2usize] == 0 && a[3usize] == 0
}
#[inline(always)]
fn combine_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x8<Self> {
let mut result = [0; 8usize];
result[0..4usize].copy_from_slice(&a.val.0);
result[4usize..8usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn splat_f64x2(self, val: f64) -> f64x2<Self> {
[val; 2usize].simd_into(self)
}
#[inline(always)]
fn load_array_f64x2(self, val: [f64; 2usize]) -> f64x2<Self> {
f64x2 {
val: crate::support::Aligned128(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_f64x2(self, val: &[f64; 2usize]) -> f64x2<Self> {
f64x2 {
val: crate::support::Aligned128(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_f64x2(self, a: f64x2<Self>) -> [f64; 2usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_f64x2(self, a: &f64x2<Self>) -> &[f64; 2usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_f64x2(self, a: &mut f64x2<Self>) -> &mut [f64; 2usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_f64x2(self, a: f64x2<Self>, dest: &mut [f64; 2usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_f64x2(self, a: u8x16<Self>) -> f64x2<Self> {
unsafe {
f64x2 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_f64x2(self, a: f64x2<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_f64x2<const SHIFT: usize>(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
let mut dest = [Default::default(); 2usize];
dest[..2usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[2usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_f64x2<const SHIFT: usize>(
self,
a: f64x2<Self>,
b: f64x2<Self>,
) -> f64x2<Self> {
self.slide_f64x2::<SHIFT>(a, b)
}
#[inline(always)]
fn abs_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
[f64::abs(a[0usize]), f64::abs(a[1usize])].simd_into(self)
}
#[inline(always)]
fn neg_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
[f64::neg(a[0usize]), f64::neg(a[1usize])].simd_into(self)
}
#[inline(always)]
fn sqrt_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
[f64::sqrt(a[0usize]), f64::sqrt(a[1usize])].simd_into(self)
}
#[inline(always)]
fn add_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
[
f64::add(a[0usize], &b[0usize]),
f64::add(a[1usize], &b[1usize]),
]
.simd_into(self)
}
#[inline(always)]
fn sub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
[
f64::sub(a[0usize], &b[0usize]),
f64::sub(a[1usize], &b[1usize]),
]
.simd_into(self)
}
#[inline(always)]
fn mul_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
[
f64::mul(a[0usize], &b[0usize]),
f64::mul(a[1usize], &b[1usize]),
]
.simd_into(self)
}
#[inline(always)]
fn div_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
[
f64::div(a[0usize], &b[0usize]),
f64::div(a[1usize], &b[1usize]),
]
.simd_into(self)
}
#[inline(always)]
fn copysign_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
[
f64::copysign(a[0usize], b[0usize]),
f64::copysign(a[1usize], b[1usize]),
]
.simd_into(self)
}
#[inline(always)]
fn simd_eq_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
[
-(f64::eq(&a[0usize], &b[0usize]) as i64),
-(f64::eq(&a[1usize], &b[1usize]) as i64),
]
.simd_into(self)
}
#[inline(always)]
fn simd_lt_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
[
-(f64::lt(&a[0usize], &b[0usize]) as i64),
-(f64::lt(&a[1usize], &b[1usize]) as i64),
]
.simd_into(self)
}
#[inline(always)]
fn simd_le_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
[
-(f64::le(&a[0usize], &b[0usize]) as i64),
-(f64::le(&a[1usize], &b[1usize]) as i64),
]
.simd_into(self)
}
#[inline(always)]
fn simd_ge_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
[
-(f64::ge(&a[0usize], &b[0usize]) as i64),
-(f64::ge(&a[1usize], &b[1usize]) as i64),
]
.simd_into(self)
}
#[inline(always)]
fn simd_gt_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
[
-(f64::gt(&a[0usize], &b[0usize]) as i64),
-(f64::gt(&a[1usize], &b[1usize]) as i64),
]
.simd_into(self)
}
#[inline(always)]
fn zip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
[a[0usize], b[0usize]].simd_into(self)
}
#[inline(always)]
fn zip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
[a[1usize], b[1usize]].simd_into(self)
}
#[inline(always)]
fn unzip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
[a[0usize], b[0usize]].simd_into(self)
}
#[inline(always)]
fn unzip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
[a[1usize], b[1usize]].simd_into(self)
}
#[inline(always)]
fn max_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
[
f64::max(a[0usize], b[0usize]),
f64::max(a[1usize], b[1usize]),
]
.simd_into(self)
}
#[inline(always)]
fn min_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
[
f64::min(a[0usize], b[0usize]),
f64::min(a[1usize], b[1usize]),
]
.simd_into(self)
}
#[inline(always)]
fn max_precise_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
[
f64::max(a[0usize], b[0usize]),
f64::max(a[1usize], b[1usize]),
]
.simd_into(self)
}
#[inline(always)]
fn min_precise_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
[
f64::min(a[0usize], b[0usize]),
f64::min(a[1usize], b[1usize]),
]
.simd_into(self)
}
#[inline(always)]
fn mul_add_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
a.mul(b).add(c)
}
#[inline(always)]
fn mul_sub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
a.mul(b).sub(c)
}
#[inline(always)]
fn floor_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
[f64::floor(a[0usize]), f64::floor(a[1usize])].simd_into(self)
}
#[inline(always)]
fn ceil_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
[f64::ceil(a[0usize]), f64::ceil(a[1usize])].simd_into(self)
}
#[inline(always)]
fn round_ties_even_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
[
f64::round_ties_even(a[0usize]),
f64::round_ties_even(a[1usize]),
]
.simd_into(self)
}
#[inline(always)]
fn fract_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
[f64::fract(a[0usize]), f64::fract(a[1usize])].simd_into(self)
}
#[inline(always)]
fn trunc_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
[f64::trunc(a[0usize]), f64::trunc(a[1usize])].simd_into(self)
}
#[inline(always)]
fn select_f64x2(self, a: mask64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
[
if a[0usize] != 0 { b[0usize] } else { c[0usize] },
if a[1usize] != 0 { b[1usize] } else { c[1usize] },
]
.simd_into(self)
}
#[inline(always)]
fn combine_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x4<Self> {
let mut result = [0.0; 4usize];
result[0..2usize].copy_from_slice(&a.val.0);
result[2usize..4usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn reinterpret_f32_f64x2(self, a: f64x2<Self>) -> f32x4<Self> {
a.bitcast()
}
#[inline(always)]
fn splat_mask64x2(self, val: i64) -> mask64x2<Self> {
[val; 2usize].simd_into(self)
}
#[inline(always)]
fn load_array_mask64x2(self, val: [i64; 2usize]) -> mask64x2<Self> {
mask64x2 {
val: crate::support::Aligned128(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask64x2(self, val: &[i64; 2usize]) -> mask64x2<Self> {
mask64x2 {
val: crate::support::Aligned128(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_mask64x2(self, a: mask64x2<Self>) -> [i64; 2usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_mask64x2(self, a: &mask64x2<Self>) -> &[i64; 2usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_mask64x2(self, a: &mut mask64x2<Self>) -> &mut [i64; 2usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_mask64x2(self, a: mask64x2<Self>, dest: &mut [i64; 2usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_mask64x2(self, a: u8x16<Self>) -> mask64x2<Self> {
unsafe {
mask64x2 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask64x2(self, a: mask64x2<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask64x2<const SHIFT: usize>(
self,
a: mask64x2<Self>,
b: mask64x2<Self>,
) -> mask64x2<Self> {
let mut dest = [Default::default(); 2usize];
dest[..2usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[2usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_mask64x2<const SHIFT: usize>(
self,
a: mask64x2<Self>,
b: mask64x2<Self>,
) -> mask64x2<Self> {
self.slide_mask64x2::<SHIFT>(a, b)
}
#[inline(always)]
fn and_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
[
i64::bitand(a[0usize], &b[0usize]),
i64::bitand(a[1usize], &b[1usize]),
]
.simd_into(self)
}
#[inline(always)]
fn or_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
[
i64::bitor(a[0usize], &b[0usize]),
i64::bitor(a[1usize], &b[1usize]),
]
.simd_into(self)
}
#[inline(always)]
fn xor_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
[
i64::bitxor(a[0usize], &b[0usize]),
i64::bitxor(a[1usize], &b[1usize]),
]
.simd_into(self)
}
#[inline(always)]
fn not_mask64x2(self, a: mask64x2<Self>) -> mask64x2<Self> {
[i64::not(a[0usize]), i64::not(a[1usize])].simd_into(self)
}
#[inline(always)]
fn select_mask64x2(
self,
a: mask64x2<Self>,
b: mask64x2<Self>,
c: mask64x2<Self>,
) -> mask64x2<Self> {
[
if a[0usize] != 0 { b[0usize] } else { c[0usize] },
if a[1usize] != 0 { b[1usize] } else { c[1usize] },
]
.simd_into(self)
}
#[inline(always)]
fn simd_eq_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
[
-(i64::eq(&a[0usize], &b[0usize]) as i64),
-(i64::eq(&a[1usize], &b[1usize]) as i64),
]
.simd_into(self)
}
#[inline(always)]
fn any_true_mask64x2(self, a: mask64x2<Self>) -> bool {
a[0usize] != 0 || a[1usize] != 0
}
#[inline(always)]
fn all_true_mask64x2(self, a: mask64x2<Self>) -> bool {
a[0usize] != 0 && a[1usize] != 0
}
#[inline(always)]
fn any_false_mask64x2(self, a: mask64x2<Self>) -> bool {
a[0usize] == 0 || a[1usize] == 0
}
#[inline(always)]
fn all_false_mask64x2(self, a: mask64x2<Self>) -> bool {
a[0usize] == 0 && a[1usize] == 0
}
#[inline(always)]
fn combine_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x4<Self> {
let mut result = [0; 4usize];
result[0..2usize].copy_from_slice(&a.val.0);
result[2usize..4usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn splat_f32x8(self, val: f32) -> f32x8<Self> {
let half = self.splat_f32x4(val);
self.combine_f32x4(half, half)
}
#[inline(always)]
fn load_array_f32x8(self, val: [f32; 8usize]) -> f32x8<Self> {
f32x8 {
val: crate::support::Aligned256(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_f32x8(self, val: &[f32; 8usize]) -> f32x8<Self> {
f32x8 {
val: crate::support::Aligned256(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_f32x8(self, a: f32x8<Self>) -> [f32; 8usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_f32x8(self, a: &f32x8<Self>) -> &[f32; 8usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_f32x8(self, a: &mut f32x8<Self>) -> &mut [f32; 8usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_f32x8(self, a: f32x8<Self>, dest: &mut [f32; 8usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_f32x8(self, a: u8x32<Self>) -> f32x8<Self> {
unsafe {
f32x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_f32x8(self, a: f32x8<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_f32x8<const SHIFT: usize>(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let mut dest = [Default::default(); 8usize];
dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_f32x8<const SHIFT: usize>(
self,
a: f32x8<Self>,
b: f32x8<Self>,
) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(
self.slide_within_blocks_f32x4::<SHIFT>(a0, b0),
self.slide_within_blocks_f32x4::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn abs_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f32x4(self.abs_f32x4(a0), self.abs_f32x4(a1))
}
#[inline(always)]
fn neg_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f32x4(self.neg_f32x4(a0), self.neg_f32x4(a1))
}
#[inline(always)]
fn sqrt_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f32x4(self.sqrt_f32x4(a0), self.sqrt_f32x4(a1))
}
#[inline(always)]
fn add_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.add_f32x4(a0, b0), self.add_f32x4(a1, b1))
}
#[inline(always)]
fn sub_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.sub_f32x4(a0, b0), self.sub_f32x4(a1, b1))
}
#[inline(always)]
fn mul_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.mul_f32x4(a0, b0), self.mul_f32x4(a1, b1))
}
#[inline(always)]
fn div_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.div_f32x4(a0, b0), self.div_f32x4(a1, b1))
}
#[inline(always)]
fn copysign_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.copysign_f32x4(a0, b0), self.copysign_f32x4(a1, b1))
}
#[inline(always)]
fn simd_eq_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_mask32x4(self.simd_eq_f32x4(a0, b0), self.simd_eq_f32x4(a1, b1))
}
#[inline(always)]
fn simd_lt_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_mask32x4(self.simd_lt_f32x4(a0, b0), self.simd_lt_f32x4(a1, b1))
}
#[inline(always)]
fn simd_le_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_mask32x4(self.simd_le_f32x4(a0, b0), self.simd_le_f32x4(a1, b1))
}
#[inline(always)]
fn simd_ge_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_mask32x4(self.simd_ge_f32x4(a0, b0), self.simd_ge_f32x4(a1, b1))
}
#[inline(always)]
fn simd_gt_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_mask32x4(self.simd_gt_f32x4(a0, b0), self.simd_gt_f32x4(a1, b1))
}
#[inline(always)]
fn zip_low_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, _) = self.split_f32x8(a);
let (b0, _) = self.split_f32x8(b);
self.combine_f32x4(self.zip_low_f32x4(a0, b0), self.zip_high_f32x4(a0, b0))
}
#[inline(always)]
fn zip_high_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (_, a1) = self.split_f32x8(a);
let (_, b1) = self.split_f32x8(b);
self.combine_f32x4(self.zip_low_f32x4(a1, b1), self.zip_high_f32x4(a1, b1))
}
#[inline(always)]
fn unzip_low_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.unzip_low_f32x4(a0, a1), self.unzip_low_f32x4(b0, b1))
}
#[inline(always)]
fn unzip_high_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.unzip_high_f32x4(a0, a1), self.unzip_high_f32x4(b0, b1))
}
#[inline(always)]
fn max_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.max_f32x4(a0, b0), self.max_f32x4(a1, b1))
}
#[inline(always)]
fn min_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.min_f32x4(a0, b0), self.min_f32x4(a1, b1))
}
#[inline(always)]
fn max_precise_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(
self.max_precise_f32x4(a0, b0),
self.max_precise_f32x4(a1, b1),
)
}
#[inline(always)]
fn min_precise_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(
self.min_precise_f32x4(a0, b0),
self.min_precise_f32x4(a1, b1),
)
}
#[inline(always)]
fn mul_add_f32x8(self, a: f32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
let (c0, c1) = self.split_f32x8(c);
self.combine_f32x4(
self.mul_add_f32x4(a0, b0, c0),
self.mul_add_f32x4(a1, b1, c1),
)
}
#[inline(always)]
fn mul_sub_f32x8(self, a: f32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
let (c0, c1) = self.split_f32x8(c);
self.combine_f32x4(
self.mul_sub_f32x4(a0, b0, c0),
self.mul_sub_f32x4(a1, b1, c1),
)
}
#[inline(always)]
fn floor_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f32x4(self.floor_f32x4(a0), self.floor_f32x4(a1))
}
#[inline(always)]
fn ceil_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f32x4(self.ceil_f32x4(a0), self.ceil_f32x4(a1))
}
#[inline(always)]
fn round_ties_even_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f32x4(
self.round_ties_even_f32x4(a0),
self.round_ties_even_f32x4(a1),
)
}
#[inline(always)]
fn fract_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f32x4(self.fract_f32x4(a0), self.fract_f32x4(a1))
}
#[inline(always)]
fn trunc_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f32x4(self.trunc_f32x4(a0), self.trunc_f32x4(a1))
}
#[inline(always)]
fn select_f32x8(self, a: mask32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_f32x8(b);
let (c0, c1) = self.split_f32x8(c);
self.combine_f32x4(self.select_f32x4(a0, b0, c0), self.select_f32x4(a1, b1, c1))
}
#[inline(always)]
fn combine_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x16<Self> {
let mut result = [0.0; 16usize];
result[0..8usize].copy_from_slice(&a.val.0);
result[8usize..16usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn split_f32x8(self, a: f32x8<Self>) -> (f32x4<Self>, f32x4<Self>) {
let mut b0 = [0.0; 4usize];
let mut b1 = [0.0; 4usize];
b0.copy_from_slice(&a.val.0[0..4usize]);
b1.copy_from_slice(&a.val.0[4usize..8usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn reinterpret_f64_f32x8(self, a: f32x8<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f64x2(
self.reinterpret_f64_f32x4(a0),
self.reinterpret_f64_f32x4(a1),
)
}
#[inline(always)]
fn reinterpret_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_i32x4(
self.reinterpret_i32_f32x4(a0),
self.reinterpret_i32_f32x4(a1),
)
}
#[inline(always)]
fn reinterpret_u8_f32x8(self, a: f32x8<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_u8x16(self.reinterpret_u8_f32x4(a0), self.reinterpret_u8_f32x4(a1))
}
#[inline(always)]
fn reinterpret_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_u32x4(
self.reinterpret_u32_f32x4(a0),
self.reinterpret_u32_f32x4(a1),
)
}
#[inline(always)]
fn cvt_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_u32x4(self.cvt_u32_f32x4(a0), self.cvt_u32_f32x4(a1))
}
#[inline(always)]
fn cvt_u32_precise_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_u32x4(
self.cvt_u32_precise_f32x4(a0),
self.cvt_u32_precise_f32x4(a1),
)
}
#[inline(always)]
fn cvt_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_i32x4(self.cvt_i32_f32x4(a0), self.cvt_i32_f32x4(a1))
}
#[inline(always)]
fn cvt_i32_precise_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_i32x4(
self.cvt_i32_precise_f32x4(a0),
self.cvt_i32_precise_f32x4(a1),
)
}
#[inline(always)]
fn splat_i8x32(self, val: i8) -> i8x32<Self> {
let half = self.splat_i8x16(val);
self.combine_i8x16(half, half)
}
#[inline(always)]
fn load_array_i8x32(self, val: [i8; 32usize]) -> i8x32<Self> {
i8x32 {
val: crate::support::Aligned256(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i8x32(self, val: &[i8; 32usize]) -> i8x32<Self> {
i8x32 {
val: crate::support::Aligned256(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_i8x32(self, a: i8x32<Self>) -> [i8; 32usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_i8x32(self, a: &i8x32<Self>) -> &[i8; 32usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_i8x32(self, a: &mut i8x32<Self>) -> &mut [i8; 32usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_i8x32(self, a: i8x32<Self>, dest: &mut [i8; 32usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_i8x32(self, a: u8x32<Self>) -> i8x32<Self> {
unsafe {
i8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i8x32(self, a: i8x32<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i8x32<const SHIFT: usize>(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let mut dest = [Default::default(); 32usize];
dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_i8x32<const SHIFT: usize>(
self,
a: i8x32<Self>,
b: i8x32<Self>,
) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(
self.slide_within_blocks_i8x16::<SHIFT>(a0, b0),
self.slide_within_blocks_i8x16::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.add_i8x16(a0, b0), self.add_i8x16(a1, b1))
}
#[inline(always)]
fn sub_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.sub_i8x16(a0, b0), self.sub_i8x16(a1, b1))
}
#[inline(always)]
fn mul_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.mul_i8x16(a0, b0), self.mul_i8x16(a1, b1))
}
#[inline(always)]
fn and_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.and_i8x16(a0, b0), self.and_i8x16(a1, b1))
}
#[inline(always)]
fn or_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.or_i8x16(a0, b0), self.or_i8x16(a1, b1))
}
#[inline(always)]
fn xor_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.xor_i8x16(a0, b0), self.xor_i8x16(a1, b1))
}
#[inline(always)]
fn not_i8x32(self, a: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
self.combine_i8x16(self.not_i8x16(a0), self.not_i8x16(a1))
}
#[inline(always)]
fn shl_i8x32(self, a: i8x32<Self>, shift: u32) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
self.combine_i8x16(self.shl_i8x16(a0, shift), self.shl_i8x16(a1, shift))
}
#[inline(always)]
fn shlv_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.shlv_i8x16(a0, b0), self.shlv_i8x16(a1, b1))
}
#[inline(always)]
fn shr_i8x32(self, a: i8x32<Self>, shift: u32) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
self.combine_i8x16(self.shr_i8x16(a0, shift), self.shr_i8x16(a1, shift))
}
#[inline(always)]
fn shrv_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.shrv_i8x16(a0, b0), self.shrv_i8x16(a1, b1))
}
#[inline(always)]
fn simd_eq_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_mask8x16(self.simd_eq_i8x16(a0, b0), self.simd_eq_i8x16(a1, b1))
}
#[inline(always)]
fn simd_lt_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_mask8x16(self.simd_lt_i8x16(a0, b0), self.simd_lt_i8x16(a1, b1))
}
#[inline(always)]
fn simd_le_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_mask8x16(self.simd_le_i8x16(a0, b0), self.simd_le_i8x16(a1, b1))
}
#[inline(always)]
fn simd_ge_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_mask8x16(self.simd_ge_i8x16(a0, b0), self.simd_ge_i8x16(a1, b1))
}
#[inline(always)]
fn simd_gt_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_mask8x16(self.simd_gt_i8x16(a0, b0), self.simd_gt_i8x16(a1, b1))
}
#[inline(always)]
fn zip_low_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, _) = self.split_i8x32(a);
let (b0, _) = self.split_i8x32(b);
self.combine_i8x16(self.zip_low_i8x16(a0, b0), self.zip_high_i8x16(a0, b0))
}
#[inline(always)]
fn zip_high_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (_, a1) = self.split_i8x32(a);
let (_, b1) = self.split_i8x32(b);
self.combine_i8x16(self.zip_low_i8x16(a1, b1), self.zip_high_i8x16(a1, b1))
}
#[inline(always)]
fn unzip_low_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.unzip_low_i8x16(a0, a1), self.unzip_low_i8x16(b0, b1))
}
#[inline(always)]
fn unzip_high_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.unzip_high_i8x16(a0, a1), self.unzip_high_i8x16(b0, b1))
}
#[inline(always)]
fn select_i8x32(self, a: mask8x32<Self>, b: i8x32<Self>, c: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
let (b0, b1) = self.split_i8x32(b);
let (c0, c1) = self.split_i8x32(c);
self.combine_i8x16(self.select_i8x16(a0, b0, c0), self.select_i8x16(a1, b1, c1))
}
#[inline(always)]
fn min_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.min_i8x16(a0, b0), self.min_i8x16(a1, b1))
}
#[inline(always)]
fn max_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.max_i8x16(a0, b0), self.max_i8x16(a1, b1))
}
#[inline(always)]
fn combine_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x64<Self> {
let mut result = [0; 64usize];
result[0..32usize].copy_from_slice(&a.val.0);
result[32usize..64usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn split_i8x32(self, a: i8x32<Self>) -> (i8x16<Self>, i8x16<Self>) {
let mut b0 = [0; 16usize];
let mut b1 = [0; 16usize];
b0.copy_from_slice(&a.val.0[0..16usize]);
b1.copy_from_slice(&a.val.0[16usize..32usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn neg_i8x32(self, a: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
self.combine_i8x16(self.neg_i8x16(a0), self.neg_i8x16(a1))
}
#[inline(always)]
fn reinterpret_u8_i8x32(self, a: i8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
self.combine_u8x16(self.reinterpret_u8_i8x16(a0), self.reinterpret_u8_i8x16(a1))
}
#[inline(always)]
fn reinterpret_u32_i8x32(self, a: i8x32<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_i8x32(a);
self.combine_u32x4(
self.reinterpret_u32_i8x16(a0),
self.reinterpret_u32_i8x16(a1),
)
}
#[inline(always)]
fn splat_u8x32(self, val: u8) -> u8x32<Self> {
let half = self.splat_u8x16(val);
self.combine_u8x16(half, half)
}
#[inline(always)]
fn load_array_u8x32(self, val: [u8; 32usize]) -> u8x32<Self> {
u8x32 {
val: crate::support::Aligned256(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u8x32(self, val: &[u8; 32usize]) -> u8x32<Self> {
u8x32 {
val: crate::support::Aligned256(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_u8x32(self, a: u8x32<Self>) -> [u8; 32usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_u8x32(self, a: &u8x32<Self>) -> &[u8; 32usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_u8x32(self, a: &mut u8x32<Self>) -> &mut [u8; 32usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_u8x32(self, a: u8x32<Self>, dest: &mut [u8; 32usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_u8x32(self, a: u8x32<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u8x32(self, a: u8x32<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u8x32<const SHIFT: usize>(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let mut dest = [Default::default(); 32usize];
dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_u8x32<const SHIFT: usize>(
self,
a: u8x32<Self>,
b: u8x32<Self>,
) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(
self.slide_within_blocks_u8x16::<SHIFT>(a0, b0),
self.slide_within_blocks_u8x16::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.add_u8x16(a0, b0), self.add_u8x16(a1, b1))
}
#[inline(always)]
fn sub_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.sub_u8x16(a0, b0), self.sub_u8x16(a1, b1))
}
#[inline(always)]
fn mul_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.mul_u8x16(a0, b0), self.mul_u8x16(a1, b1))
}
#[inline(always)]
fn and_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.and_u8x16(a0, b0), self.and_u8x16(a1, b1))
}
#[inline(always)]
fn or_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.or_u8x16(a0, b0), self.or_u8x16(a1, b1))
}
#[inline(always)]
fn xor_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.xor_u8x16(a0, b0), self.xor_u8x16(a1, b1))
}
#[inline(always)]
fn not_u8x32(self, a: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
self.combine_u8x16(self.not_u8x16(a0), self.not_u8x16(a1))
}
#[inline(always)]
fn shl_u8x32(self, a: u8x32<Self>, shift: u32) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
self.combine_u8x16(self.shl_u8x16(a0, shift), self.shl_u8x16(a1, shift))
}
#[inline(always)]
fn shlv_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.shlv_u8x16(a0, b0), self.shlv_u8x16(a1, b1))
}
#[inline(always)]
fn shr_u8x32(self, a: u8x32<Self>, shift: u32) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
self.combine_u8x16(self.shr_u8x16(a0, shift), self.shr_u8x16(a1, shift))
}
#[inline(always)]
fn shrv_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.shrv_u8x16(a0, b0), self.shrv_u8x16(a1, b1))
}
#[inline(always)]
fn simd_eq_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_mask8x16(self.simd_eq_u8x16(a0, b0), self.simd_eq_u8x16(a1, b1))
}
#[inline(always)]
fn simd_lt_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_mask8x16(self.simd_lt_u8x16(a0, b0), self.simd_lt_u8x16(a1, b1))
}
#[inline(always)]
fn simd_le_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_mask8x16(self.simd_le_u8x16(a0, b0), self.simd_le_u8x16(a1, b1))
}
#[inline(always)]
fn simd_ge_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_mask8x16(self.simd_ge_u8x16(a0, b0), self.simd_ge_u8x16(a1, b1))
}
#[inline(always)]
fn simd_gt_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_mask8x16(self.simd_gt_u8x16(a0, b0), self.simd_gt_u8x16(a1, b1))
}
#[inline(always)]
fn zip_low_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, _) = self.split_u8x32(a);
let (b0, _) = self.split_u8x32(b);
self.combine_u8x16(self.zip_low_u8x16(a0, b0), self.zip_high_u8x16(a0, b0))
}
#[inline(always)]
fn zip_high_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (_, a1) = self.split_u8x32(a);
let (_, b1) = self.split_u8x32(b);
self.combine_u8x16(self.zip_low_u8x16(a1, b1), self.zip_high_u8x16(a1, b1))
}
#[inline(always)]
fn unzip_low_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.unzip_low_u8x16(a0, a1), self.unzip_low_u8x16(b0, b1))
}
#[inline(always)]
fn unzip_high_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.unzip_high_u8x16(a0, a1), self.unzip_high_u8x16(b0, b1))
}
#[inline(always)]
fn select_u8x32(self, a: mask8x32<Self>, b: u8x32<Self>, c: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
let (b0, b1) = self.split_u8x32(b);
let (c0, c1) = self.split_u8x32(c);
self.combine_u8x16(self.select_u8x16(a0, b0, c0), self.select_u8x16(a1, b1, c1))
}
#[inline(always)]
fn min_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.min_u8x16(a0, b0), self.min_u8x16(a1, b1))
}
#[inline(always)]
fn max_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.max_u8x16(a0, b0), self.max_u8x16(a1, b1))
}
#[inline(always)]
fn combine_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x64<Self> {
let mut result = [0; 64usize];
result[0..32usize].copy_from_slice(&a.val.0);
result[32usize..64usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn split_u8x32(self, a: u8x32<Self>) -> (u8x16<Self>, u8x16<Self>) {
let mut b0 = [0; 16usize];
let mut b1 = [0; 16usize];
b0.copy_from_slice(&a.val.0[0..16usize]);
b1.copy_from_slice(&a.val.0[16usize..32usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn widen_u8x32(self, a: u8x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u8x32(a);
self.combine_u16x16(self.widen_u8x16(a0), self.widen_u8x16(a1))
}
#[inline(always)]
fn reinterpret_u32_u8x32(self, a: u8x32<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u8x32(a);
self.combine_u32x4(
self.reinterpret_u32_u8x16(a0),
self.reinterpret_u32_u8x16(a1),
)
}
#[inline(always)]
fn splat_mask8x32(self, val: i8) -> mask8x32<Self> {
let half = self.splat_mask8x16(val);
self.combine_mask8x16(half, half)
}
#[inline(always)]
fn load_array_mask8x32(self, val: [i8; 32usize]) -> mask8x32<Self> {
mask8x32 {
val: crate::support::Aligned256(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask8x32(self, val: &[i8; 32usize]) -> mask8x32<Self> {
mask8x32 {
val: crate::support::Aligned256(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_mask8x32(self, a: mask8x32<Self>) -> [i8; 32usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_mask8x32(self, a: &mask8x32<Self>) -> &[i8; 32usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_mask8x32(self, a: &mut mask8x32<Self>) -> &mut [i8; 32usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_mask8x32(self, a: mask8x32<Self>, dest: &mut [i8; 32usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_mask8x32(self, a: u8x32<Self>) -> mask8x32<Self> {
unsafe {
mask8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask8x32(self, a: mask8x32<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask8x32<const SHIFT: usize>(
self,
a: mask8x32<Self>,
b: mask8x32<Self>,
) -> mask8x32<Self> {
let mut dest = [Default::default(); 32usize];
dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_mask8x32<const SHIFT: usize>(
self,
a: mask8x32<Self>,
b: mask8x32<Self>,
) -> mask8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
let (b0, b1) = self.split_mask8x32(b);
self.combine_mask8x16(
self.slide_within_blocks_mask8x16::<SHIFT>(a0, b0),
self.slide_within_blocks_mask8x16::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn and_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
let (b0, b1) = self.split_mask8x32(b);
self.combine_mask8x16(self.and_mask8x16(a0, b0), self.and_mask8x16(a1, b1))
}
#[inline(always)]
fn or_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
let (b0, b1) = self.split_mask8x32(b);
self.combine_mask8x16(self.or_mask8x16(a0, b0), self.or_mask8x16(a1, b1))
}
#[inline(always)]
fn xor_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
let (b0, b1) = self.split_mask8x32(b);
self.combine_mask8x16(self.xor_mask8x16(a0, b0), self.xor_mask8x16(a1, b1))
}
#[inline(always)]
fn not_mask8x32(self, a: mask8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
self.combine_mask8x16(self.not_mask8x16(a0), self.not_mask8x16(a1))
}
#[inline(always)]
fn select_mask8x32(
self,
a: mask8x32<Self>,
b: mask8x32<Self>,
c: mask8x32<Self>,
) -> mask8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
let (b0, b1) = self.split_mask8x32(b);
let (c0, c1) = self.split_mask8x32(c);
self.combine_mask8x16(
self.select_mask8x16(a0, b0, c0),
self.select_mask8x16(a1, b1, c1),
)
}
#[inline(always)]
fn simd_eq_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
let (b0, b1) = self.split_mask8x32(b);
self.combine_mask8x16(self.simd_eq_mask8x16(a0, b0), self.simd_eq_mask8x16(a1, b1))
}
#[inline(always)]
fn any_true_mask8x32(self, a: mask8x32<Self>) -> bool {
let (a0, a1) = self.split_mask8x32(a);
self.any_true_mask8x16(a0) || self.any_true_mask8x16(a1)
}
#[inline(always)]
fn all_true_mask8x32(self, a: mask8x32<Self>) -> bool {
let (a0, a1) = self.split_mask8x32(a);
self.all_true_mask8x16(a0) && self.all_true_mask8x16(a1)
}
#[inline(always)]
fn any_false_mask8x32(self, a: mask8x32<Self>) -> bool {
let (a0, a1) = self.split_mask8x32(a);
self.any_false_mask8x16(a0) || self.any_false_mask8x16(a1)
}
#[inline(always)]
fn all_false_mask8x32(self, a: mask8x32<Self>) -> bool {
let (a0, a1) = self.split_mask8x32(a);
self.all_false_mask8x16(a0) && self.all_false_mask8x16(a1)
}
#[inline(always)]
fn combine_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x64<Self> {
let mut result = [0; 64usize];
result[0..32usize].copy_from_slice(&a.val.0);
result[32usize..64usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn split_mask8x32(self, a: mask8x32<Self>) -> (mask8x16<Self>, mask8x16<Self>) {
let mut b0 = [0; 16usize];
let mut b1 = [0; 16usize];
b0.copy_from_slice(&a.val.0[0..16usize]);
b1.copy_from_slice(&a.val.0[16usize..32usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn splat_i16x16(self, val: i16) -> i16x16<Self> {
let half = self.splat_i16x8(val);
self.combine_i16x8(half, half)
}
#[inline(always)]
fn load_array_i16x16(self, val: [i16; 16usize]) -> i16x16<Self> {
i16x16 {
val: crate::support::Aligned256(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i16x16(self, val: &[i16; 16usize]) -> i16x16<Self> {
i16x16 {
val: crate::support::Aligned256(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_i16x16(self, a: i16x16<Self>) -> [i16; 16usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_i16x16(self, a: &i16x16<Self>) -> &[i16; 16usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_i16x16(self, a: &mut i16x16<Self>) -> &mut [i16; 16usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_i16x16(self, a: i16x16<Self>, dest: &mut [i16; 16usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_i16x16(self, a: u8x32<Self>) -> i16x16<Self> {
unsafe {
i16x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i16x16(self, a: i16x16<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i16x16<const SHIFT: usize>(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let mut dest = [Default::default(); 16usize];
dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_i16x16<const SHIFT: usize>(
self,
a: i16x16<Self>,
b: i16x16<Self>,
) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(
self.slide_within_blocks_i16x8::<SHIFT>(a0, b0),
self.slide_within_blocks_i16x8::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.add_i16x8(a0, b0), self.add_i16x8(a1, b1))
}
#[inline(always)]
fn sub_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.sub_i16x8(a0, b0), self.sub_i16x8(a1, b1))
}
#[inline(always)]
fn mul_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.mul_i16x8(a0, b0), self.mul_i16x8(a1, b1))
}
#[inline(always)]
fn and_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.and_i16x8(a0, b0), self.and_i16x8(a1, b1))
}
#[inline(always)]
fn or_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.or_i16x8(a0, b0), self.or_i16x8(a1, b1))
}
#[inline(always)]
fn xor_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.xor_i16x8(a0, b0), self.xor_i16x8(a1, b1))
}
#[inline(always)]
fn not_i16x16(self, a: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
self.combine_i16x8(self.not_i16x8(a0), self.not_i16x8(a1))
}
#[inline(always)]
fn shl_i16x16(self, a: i16x16<Self>, shift: u32) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
self.combine_i16x8(self.shl_i16x8(a0, shift), self.shl_i16x8(a1, shift))
}
#[inline(always)]
fn shlv_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.shlv_i16x8(a0, b0), self.shlv_i16x8(a1, b1))
}
#[inline(always)]
fn shr_i16x16(self, a: i16x16<Self>, shift: u32) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
self.combine_i16x8(self.shr_i16x8(a0, shift), self.shr_i16x8(a1, shift))
}
#[inline(always)]
fn shrv_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.shrv_i16x8(a0, b0), self.shrv_i16x8(a1, b1))
}
#[inline(always)]
fn simd_eq_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_mask16x8(self.simd_eq_i16x8(a0, b0), self.simd_eq_i16x8(a1, b1))
}
#[inline(always)]
fn simd_lt_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_mask16x8(self.simd_lt_i16x8(a0, b0), self.simd_lt_i16x8(a1, b1))
}
#[inline(always)]
fn simd_le_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_mask16x8(self.simd_le_i16x8(a0, b0), self.simd_le_i16x8(a1, b1))
}
#[inline(always)]
fn simd_ge_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_mask16x8(self.simd_ge_i16x8(a0, b0), self.simd_ge_i16x8(a1, b1))
}
#[inline(always)]
fn simd_gt_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_mask16x8(self.simd_gt_i16x8(a0, b0), self.simd_gt_i16x8(a1, b1))
}
#[inline(always)]
fn zip_low_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, _) = self.split_i16x16(a);
let (b0, _) = self.split_i16x16(b);
self.combine_i16x8(self.zip_low_i16x8(a0, b0), self.zip_high_i16x8(a0, b0))
}
#[inline(always)]
fn zip_high_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (_, a1) = self.split_i16x16(a);
let (_, b1) = self.split_i16x16(b);
self.combine_i16x8(self.zip_low_i16x8(a1, b1), self.zip_high_i16x8(a1, b1))
}
#[inline(always)]
fn unzip_low_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.unzip_low_i16x8(a0, a1), self.unzip_low_i16x8(b0, b1))
}
#[inline(always)]
fn unzip_high_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.unzip_high_i16x8(a0, a1), self.unzip_high_i16x8(b0, b1))
}
#[inline(always)]
fn select_i16x16(self, a: mask16x16<Self>, b: i16x16<Self>, c: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
let (b0, b1) = self.split_i16x16(b);
let (c0, c1) = self.split_i16x16(c);
self.combine_i16x8(self.select_i16x8(a0, b0, c0), self.select_i16x8(a1, b1, c1))
}
#[inline(always)]
fn min_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.min_i16x8(a0, b0), self.min_i16x8(a1, b1))
}
#[inline(always)]
fn max_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.max_i16x8(a0, b0), self.max_i16x8(a1, b1))
}
#[inline(always)]
fn combine_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x32<Self> {
let mut result = [0; 32usize];
result[0..16usize].copy_from_slice(&a.val.0);
result[16usize..32usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn split_i16x16(self, a: i16x16<Self>) -> (i16x8<Self>, i16x8<Self>) {
let mut b0 = [0; 8usize];
let mut b1 = [0; 8usize];
b0.copy_from_slice(&a.val.0[0..8usize]);
b1.copy_from_slice(&a.val.0[8usize..16usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn neg_i16x16(self, a: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
self.combine_i16x8(self.neg_i16x8(a0), self.neg_i16x8(a1))
}
#[inline(always)]
fn reinterpret_u8_i16x16(self, a: i16x16<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_i16x16(a);
self.combine_u8x16(self.reinterpret_u8_i16x8(a0), self.reinterpret_u8_i16x8(a1))
}
#[inline(always)]
fn reinterpret_u32_i16x16(self, a: i16x16<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_i16x16(a);
self.combine_u32x4(
self.reinterpret_u32_i16x8(a0),
self.reinterpret_u32_i16x8(a1),
)
}
#[inline(always)]
fn splat_u16x16(self, val: u16) -> u16x16<Self> {
let half = self.splat_u16x8(val);
self.combine_u16x8(half, half)
}
#[inline(always)]
fn load_array_u16x16(self, val: [u16; 16usize]) -> u16x16<Self> {
u16x16 {
val: crate::support::Aligned256(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u16x16(self, val: &[u16; 16usize]) -> u16x16<Self> {
u16x16 {
val: crate::support::Aligned256(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_u16x16(self, a: u16x16<Self>) -> [u16; 16usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_u16x16(self, a: &u16x16<Self>) -> &[u16; 16usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_u16x16(self, a: &mut u16x16<Self>) -> &mut [u16; 16usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_u16x16(self, a: u16x16<Self>, dest: &mut [u16; 16usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_u16x16(self, a: u8x32<Self>) -> u16x16<Self> {
unsafe {
u16x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u16x16(self, a: u16x16<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u16x16<const SHIFT: usize>(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let mut dest = [Default::default(); 16usize];
dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_u16x16<const SHIFT: usize>(
self,
a: u16x16<Self>,
b: u16x16<Self>,
) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(
self.slide_within_blocks_u16x8::<SHIFT>(a0, b0),
self.slide_within_blocks_u16x8::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.add_u16x8(a0, b0), self.add_u16x8(a1, b1))
}
#[inline(always)]
fn sub_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.sub_u16x8(a0, b0), self.sub_u16x8(a1, b1))
}
#[inline(always)]
fn mul_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.mul_u16x8(a0, b0), self.mul_u16x8(a1, b1))
}
#[inline(always)]
fn and_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.and_u16x8(a0, b0), self.and_u16x8(a1, b1))
}
#[inline(always)]
fn or_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.or_u16x8(a0, b0), self.or_u16x8(a1, b1))
}
#[inline(always)]
fn xor_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.xor_u16x8(a0, b0), self.xor_u16x8(a1, b1))
}
#[inline(always)]
fn not_u16x16(self, a: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
self.combine_u16x8(self.not_u16x8(a0), self.not_u16x8(a1))
}
#[inline(always)]
fn shl_u16x16(self, a: u16x16<Self>, shift: u32) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
self.combine_u16x8(self.shl_u16x8(a0, shift), self.shl_u16x8(a1, shift))
}
#[inline(always)]
fn shlv_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.shlv_u16x8(a0, b0), self.shlv_u16x8(a1, b1))
}
#[inline(always)]
fn shr_u16x16(self, a: u16x16<Self>, shift: u32) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
self.combine_u16x8(self.shr_u16x8(a0, shift), self.shr_u16x8(a1, shift))
}
#[inline(always)]
fn shrv_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.shrv_u16x8(a0, b0), self.shrv_u16x8(a1, b1))
}
#[inline(always)]
fn simd_eq_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_mask16x8(self.simd_eq_u16x8(a0, b0), self.simd_eq_u16x8(a1, b1))
}
#[inline(always)]
fn simd_lt_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_mask16x8(self.simd_lt_u16x8(a0, b0), self.simd_lt_u16x8(a1, b1))
}
#[inline(always)]
fn simd_le_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_mask16x8(self.simd_le_u16x8(a0, b0), self.simd_le_u16x8(a1, b1))
}
#[inline(always)]
fn simd_ge_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_mask16x8(self.simd_ge_u16x8(a0, b0), self.simd_ge_u16x8(a1, b1))
}
#[inline(always)]
fn simd_gt_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_mask16x8(self.simd_gt_u16x8(a0, b0), self.simd_gt_u16x8(a1, b1))
}
#[inline(always)]
fn zip_low_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, _) = self.split_u16x16(a);
let (b0, _) = self.split_u16x16(b);
self.combine_u16x8(self.zip_low_u16x8(a0, b0), self.zip_high_u16x8(a0, b0))
}
#[inline(always)]
fn zip_high_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (_, a1) = self.split_u16x16(a);
let (_, b1) = self.split_u16x16(b);
self.combine_u16x8(self.zip_low_u16x8(a1, b1), self.zip_high_u16x8(a1, b1))
}
#[inline(always)]
fn unzip_low_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.unzip_low_u16x8(a0, a1), self.unzip_low_u16x8(b0, b1))
}
#[inline(always)]
fn unzip_high_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.unzip_high_u16x8(a0, a1), self.unzip_high_u16x8(b0, b1))
}
#[inline(always)]
fn select_u16x16(self, a: mask16x16<Self>, b: u16x16<Self>, c: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
let (b0, b1) = self.split_u16x16(b);
let (c0, c1) = self.split_u16x16(c);
self.combine_u16x8(self.select_u16x8(a0, b0, c0), self.select_u16x8(a1, b1, c1))
}
#[inline(always)]
fn min_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.min_u16x8(a0, b0), self.min_u16x8(a1, b1))
}
#[inline(always)]
fn max_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.max_u16x8(a0, b0), self.max_u16x8(a1, b1))
}
#[inline(always)]
fn combine_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x32<Self> {
let mut result = [0; 32usize];
result[0..16usize].copy_from_slice(&a.val.0);
result[16usize..32usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn split_u16x16(self, a: u16x16<Self>) -> (u16x8<Self>, u16x8<Self>) {
let mut b0 = [0; 8usize];
let mut b1 = [0; 8usize];
b0.copy_from_slice(&a.val.0[0..8usize]);
b1.copy_from_slice(&a.val.0[8usize..16usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn narrow_u16x16(self, a: u16x16<Self>) -> u8x16<Self> {
[
a[0usize] as u8,
a[1usize] as u8,
a[2usize] as u8,
a[3usize] as u8,
a[4usize] as u8,
a[5usize] as u8,
a[6usize] as u8,
a[7usize] as u8,
a[8usize] as u8,
a[9usize] as u8,
a[10usize] as u8,
a[11usize] as u8,
a[12usize] as u8,
a[13usize] as u8,
a[14usize] as u8,
a[15usize] as u8,
]
.simd_into(self)
}
#[inline(always)]
fn reinterpret_u8_u16x16(self, a: u16x16<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u16x16(a);
self.combine_u8x16(self.reinterpret_u8_u16x8(a0), self.reinterpret_u8_u16x8(a1))
}
#[inline(always)]
fn reinterpret_u32_u16x16(self, a: u16x16<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u16x16(a);
self.combine_u32x4(
self.reinterpret_u32_u16x8(a0),
self.reinterpret_u32_u16x8(a1),
)
}
#[inline(always)]
fn splat_mask16x16(self, val: i16) -> mask16x16<Self> {
let half = self.splat_mask16x8(val);
self.combine_mask16x8(half, half)
}
#[inline(always)]
fn load_array_mask16x16(self, val: [i16; 16usize]) -> mask16x16<Self> {
mask16x16 {
val: crate::support::Aligned256(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask16x16(self, val: &[i16; 16usize]) -> mask16x16<Self> {
mask16x16 {
val: crate::support::Aligned256(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_mask16x16(self, a: mask16x16<Self>) -> [i16; 16usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_mask16x16(self, a: &mask16x16<Self>) -> &[i16; 16usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_mask16x16(self, a: &mut mask16x16<Self>) -> &mut [i16; 16usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_mask16x16(self, a: mask16x16<Self>, dest: &mut [i16; 16usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_mask16x16(self, a: u8x32<Self>) -> mask16x16<Self> {
unsafe {
mask16x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask16x16(self, a: mask16x16<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask16x16<const SHIFT: usize>(
self,
a: mask16x16<Self>,
b: mask16x16<Self>,
) -> mask16x16<Self> {
let mut dest = [Default::default(); 16usize];
dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_mask16x16<const SHIFT: usize>(
self,
a: mask16x16<Self>,
b: mask16x16<Self>,
) -> mask16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
let (b0, b1) = self.split_mask16x16(b);
self.combine_mask16x8(
self.slide_within_blocks_mask16x8::<SHIFT>(a0, b0),
self.slide_within_blocks_mask16x8::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn and_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
let (b0, b1) = self.split_mask16x16(b);
self.combine_mask16x8(self.and_mask16x8(a0, b0), self.and_mask16x8(a1, b1))
}
#[inline(always)]
fn or_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
let (b0, b1) = self.split_mask16x16(b);
self.combine_mask16x8(self.or_mask16x8(a0, b0), self.or_mask16x8(a1, b1))
}
#[inline(always)]
fn xor_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
let (b0, b1) = self.split_mask16x16(b);
self.combine_mask16x8(self.xor_mask16x8(a0, b0), self.xor_mask16x8(a1, b1))
}
#[inline(always)]
fn not_mask16x16(self, a: mask16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
self.combine_mask16x8(self.not_mask16x8(a0), self.not_mask16x8(a1))
}
#[inline(always)]
fn select_mask16x16(
self,
a: mask16x16<Self>,
b: mask16x16<Self>,
c: mask16x16<Self>,
) -> mask16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
let (b0, b1) = self.split_mask16x16(b);
let (c0, c1) = self.split_mask16x16(c);
self.combine_mask16x8(
self.select_mask16x8(a0, b0, c0),
self.select_mask16x8(a1, b1, c1),
)
}
#[inline(always)]
fn simd_eq_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
let (b0, b1) = self.split_mask16x16(b);
self.combine_mask16x8(self.simd_eq_mask16x8(a0, b0), self.simd_eq_mask16x8(a1, b1))
}
#[inline(always)]
fn any_true_mask16x16(self, a: mask16x16<Self>) -> bool {
let (a0, a1) = self.split_mask16x16(a);
self.any_true_mask16x8(a0) || self.any_true_mask16x8(a1)
}
#[inline(always)]
fn all_true_mask16x16(self, a: mask16x16<Self>) -> bool {
let (a0, a1) = self.split_mask16x16(a);
self.all_true_mask16x8(a0) && self.all_true_mask16x8(a1)
}
#[inline(always)]
fn any_false_mask16x16(self, a: mask16x16<Self>) -> bool {
let (a0, a1) = self.split_mask16x16(a);
self.any_false_mask16x8(a0) || self.any_false_mask16x8(a1)
}
#[inline(always)]
fn all_false_mask16x16(self, a: mask16x16<Self>) -> bool {
let (a0, a1) = self.split_mask16x16(a);
self.all_false_mask16x8(a0) && self.all_false_mask16x8(a1)
}
#[inline(always)]
fn combine_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x32<Self> {
let mut result = [0; 32usize];
result[0..16usize].copy_from_slice(&a.val.0);
result[16usize..32usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn split_mask16x16(self, a: mask16x16<Self>) -> (mask16x8<Self>, mask16x8<Self>) {
let mut b0 = [0; 8usize];
let mut b1 = [0; 8usize];
b0.copy_from_slice(&a.val.0[0..8usize]);
b1.copy_from_slice(&a.val.0[8usize..16usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn splat_i32x8(self, val: i32) -> i32x8<Self> {
let half = self.splat_i32x4(val);
self.combine_i32x4(half, half)
}
#[inline(always)]
fn load_array_i32x8(self, val: [i32; 8usize]) -> i32x8<Self> {
i32x8 {
val: crate::support::Aligned256(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i32x8(self, val: &[i32; 8usize]) -> i32x8<Self> {
i32x8 {
val: crate::support::Aligned256(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_i32x8(self, a: i32x8<Self>) -> [i32; 8usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_i32x8(self, a: &i32x8<Self>) -> &[i32; 8usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_i32x8(self, a: &mut i32x8<Self>) -> &mut [i32; 8usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_i32x8(self, a: i32x8<Self>, dest: &mut [i32; 8usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_i32x8(self, a: u8x32<Self>) -> i32x8<Self> {
unsafe {
i32x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i32x8(self, a: i32x8<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i32x8<const SHIFT: usize>(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let mut dest = [Default::default(); 8usize];
dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_i32x8<const SHIFT: usize>(
self,
a: i32x8<Self>,
b: i32x8<Self>,
) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(
self.slide_within_blocks_i32x4::<SHIFT>(a0, b0),
self.slide_within_blocks_i32x4::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.add_i32x4(a0, b0), self.add_i32x4(a1, b1))
}
#[inline(always)]
fn sub_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.sub_i32x4(a0, b0), self.sub_i32x4(a1, b1))
}
#[inline(always)]
fn mul_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.mul_i32x4(a0, b0), self.mul_i32x4(a1, b1))
}
#[inline(always)]
fn and_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.and_i32x4(a0, b0), self.and_i32x4(a1, b1))
}
#[inline(always)]
fn or_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.or_i32x4(a0, b0), self.or_i32x4(a1, b1))
}
#[inline(always)]
fn xor_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.xor_i32x4(a0, b0), self.xor_i32x4(a1, b1))
}
#[inline(always)]
fn not_i32x8(self, a: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
self.combine_i32x4(self.not_i32x4(a0), self.not_i32x4(a1))
}
#[inline(always)]
fn shl_i32x8(self, a: i32x8<Self>, shift: u32) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
self.combine_i32x4(self.shl_i32x4(a0, shift), self.shl_i32x4(a1, shift))
}
#[inline(always)]
fn shlv_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.shlv_i32x4(a0, b0), self.shlv_i32x4(a1, b1))
}
#[inline(always)]
fn shr_i32x8(self, a: i32x8<Self>, shift: u32) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
self.combine_i32x4(self.shr_i32x4(a0, shift), self.shr_i32x4(a1, shift))
}
#[inline(always)]
fn shrv_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.shrv_i32x4(a0, b0), self.shrv_i32x4(a1, b1))
}
#[inline(always)]
fn simd_eq_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_mask32x4(self.simd_eq_i32x4(a0, b0), self.simd_eq_i32x4(a1, b1))
}
#[inline(always)]
fn simd_lt_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_mask32x4(self.simd_lt_i32x4(a0, b0), self.simd_lt_i32x4(a1, b1))
}
#[inline(always)]
fn simd_le_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_mask32x4(self.simd_le_i32x4(a0, b0), self.simd_le_i32x4(a1, b1))
}
#[inline(always)]
fn simd_ge_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_mask32x4(self.simd_ge_i32x4(a0, b0), self.simd_ge_i32x4(a1, b1))
}
#[inline(always)]
fn simd_gt_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_mask32x4(self.simd_gt_i32x4(a0, b0), self.simd_gt_i32x4(a1, b1))
}
#[inline(always)]
fn zip_low_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, _) = self.split_i32x8(a);
let (b0, _) = self.split_i32x8(b);
self.combine_i32x4(self.zip_low_i32x4(a0, b0), self.zip_high_i32x4(a0, b0))
}
#[inline(always)]
fn zip_high_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (_, a1) = self.split_i32x8(a);
let (_, b1) = self.split_i32x8(b);
self.combine_i32x4(self.zip_low_i32x4(a1, b1), self.zip_high_i32x4(a1, b1))
}
#[inline(always)]
fn unzip_low_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.unzip_low_i32x4(a0, a1), self.unzip_low_i32x4(b0, b1))
}
#[inline(always)]
fn unzip_high_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.unzip_high_i32x4(a0, a1), self.unzip_high_i32x4(b0, b1))
}
#[inline(always)]
fn select_i32x8(self, a: mask32x8<Self>, b: i32x8<Self>, c: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_i32x8(b);
let (c0, c1) = self.split_i32x8(c);
self.combine_i32x4(self.select_i32x4(a0, b0, c0), self.select_i32x4(a1, b1, c1))
}
#[inline(always)]
fn min_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.min_i32x4(a0, b0), self.min_i32x4(a1, b1))
}
#[inline(always)]
fn max_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.max_i32x4(a0, b0), self.max_i32x4(a1, b1))
}
#[inline(always)]
fn combine_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x16<Self> {
let mut result = [0; 16usize];
result[0..8usize].copy_from_slice(&a.val.0);
result[8usize..16usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn split_i32x8(self, a: i32x8<Self>) -> (i32x4<Self>, i32x4<Self>) {
let mut b0 = [0; 4usize];
let mut b1 = [0; 4usize];
b0.copy_from_slice(&a.val.0[0..4usize]);
b1.copy_from_slice(&a.val.0[4usize..8usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn neg_i32x8(self, a: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
self.combine_i32x4(self.neg_i32x4(a0), self.neg_i32x4(a1))
}
#[inline(always)]
fn reinterpret_u8_i32x8(self, a: i32x8<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_i32x8(a);
self.combine_u8x16(self.reinterpret_u8_i32x4(a0), self.reinterpret_u8_i32x4(a1))
}
#[inline(always)]
fn reinterpret_u32_i32x8(self, a: i32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
self.combine_u32x4(
self.reinterpret_u32_i32x4(a0),
self.reinterpret_u32_i32x4(a1),
)
}
#[inline(always)]
fn cvt_f32_i32x8(self, a: i32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
self.combine_f32x4(self.cvt_f32_i32x4(a0), self.cvt_f32_i32x4(a1))
}
#[inline(always)]
fn splat_u32x8(self, val: u32) -> u32x8<Self> {
let half = self.splat_u32x4(val);
self.combine_u32x4(half, half)
}
#[inline(always)]
fn load_array_u32x8(self, val: [u32; 8usize]) -> u32x8<Self> {
u32x8 {
val: crate::support::Aligned256(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u32x8(self, val: &[u32; 8usize]) -> u32x8<Self> {
u32x8 {
val: crate::support::Aligned256(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_u32x8(self, a: u32x8<Self>) -> [u32; 8usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_u32x8(self, a: &u32x8<Self>) -> &[u32; 8usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_u32x8(self, a: &mut u32x8<Self>) -> &mut [u32; 8usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_u32x8(self, a: u32x8<Self>, dest: &mut [u32; 8usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_u32x8(self, a: u8x32<Self>) -> u32x8<Self> {
unsafe {
u32x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u32x8(self, a: u32x8<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u32x8<const SHIFT: usize>(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let mut dest = [Default::default(); 8usize];
dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_u32x8<const SHIFT: usize>(
self,
a: u32x8<Self>,
b: u32x8<Self>,
) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(
self.slide_within_blocks_u32x4::<SHIFT>(a0, b0),
self.slide_within_blocks_u32x4::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.add_u32x4(a0, b0), self.add_u32x4(a1, b1))
}
#[inline(always)]
fn sub_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.sub_u32x4(a0, b0), self.sub_u32x4(a1, b1))
}
#[inline(always)]
fn mul_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.mul_u32x4(a0, b0), self.mul_u32x4(a1, b1))
}
#[inline(always)]
fn and_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.and_u32x4(a0, b0), self.and_u32x4(a1, b1))
}
#[inline(always)]
fn or_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.or_u32x4(a0, b0), self.or_u32x4(a1, b1))
}
#[inline(always)]
fn xor_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.xor_u32x4(a0, b0), self.xor_u32x4(a1, b1))
}
#[inline(always)]
fn not_u32x8(self, a: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
self.combine_u32x4(self.not_u32x4(a0), self.not_u32x4(a1))
}
#[inline(always)]
fn shl_u32x8(self, a: u32x8<Self>, shift: u32) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
self.combine_u32x4(self.shl_u32x4(a0, shift), self.shl_u32x4(a1, shift))
}
#[inline(always)]
fn shlv_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.shlv_u32x4(a0, b0), self.shlv_u32x4(a1, b1))
}
#[inline(always)]
fn shr_u32x8(self, a: u32x8<Self>, shift: u32) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
self.combine_u32x4(self.shr_u32x4(a0, shift), self.shr_u32x4(a1, shift))
}
#[inline(always)]
fn shrv_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.shrv_u32x4(a0, b0), self.shrv_u32x4(a1, b1))
}
#[inline(always)]
fn simd_eq_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_mask32x4(self.simd_eq_u32x4(a0, b0), self.simd_eq_u32x4(a1, b1))
}
#[inline(always)]
fn simd_lt_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_mask32x4(self.simd_lt_u32x4(a0, b0), self.simd_lt_u32x4(a1, b1))
}
#[inline(always)]
fn simd_le_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_mask32x4(self.simd_le_u32x4(a0, b0), self.simd_le_u32x4(a1, b1))
}
#[inline(always)]
fn simd_ge_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_mask32x4(self.simd_ge_u32x4(a0, b0), self.simd_ge_u32x4(a1, b1))
}
#[inline(always)]
fn simd_gt_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_mask32x4(self.simd_gt_u32x4(a0, b0), self.simd_gt_u32x4(a1, b1))
}
#[inline(always)]
fn zip_low_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, _) = self.split_u32x8(a);
let (b0, _) = self.split_u32x8(b);
self.combine_u32x4(self.zip_low_u32x4(a0, b0), self.zip_high_u32x4(a0, b0))
}
#[inline(always)]
fn zip_high_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (_, a1) = self.split_u32x8(a);
let (_, b1) = self.split_u32x8(b);
self.combine_u32x4(self.zip_low_u32x4(a1, b1), self.zip_high_u32x4(a1, b1))
}
#[inline(always)]
fn unzip_low_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.unzip_low_u32x4(a0, a1), self.unzip_low_u32x4(b0, b1))
}
#[inline(always)]
fn unzip_high_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.unzip_high_u32x4(a0, a1), self.unzip_high_u32x4(b0, b1))
}
#[inline(always)]
fn select_u32x8(self, a: mask32x8<Self>, b: u32x8<Self>, c: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_u32x8(b);
let (c0, c1) = self.split_u32x8(c);
self.combine_u32x4(self.select_u32x4(a0, b0, c0), self.select_u32x4(a1, b1, c1))
}
#[inline(always)]
fn min_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.min_u32x4(a0, b0), self.min_u32x4(a1, b1))
}
#[inline(always)]
fn max_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.max_u32x4(a0, b0), self.max_u32x4(a1, b1))
}
#[inline(always)]
fn combine_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x16<Self> {
let mut result = [0; 16usize];
result[0..8usize].copy_from_slice(&a.val.0);
result[8usize..16usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn split_u32x8(self, a: u32x8<Self>) -> (u32x4<Self>, u32x4<Self>) {
let mut b0 = [0; 4usize];
let mut b1 = [0; 4usize];
b0.copy_from_slice(&a.val.0[0..4usize]);
b1.copy_from_slice(&a.val.0[4usize..8usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn reinterpret_u8_u32x8(self, a: u32x8<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u32x8(a);
self.combine_u8x16(self.reinterpret_u8_u32x4(a0), self.reinterpret_u8_u32x4(a1))
}
#[inline(always)]
fn cvt_f32_u32x8(self, a: u32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
self.combine_f32x4(self.cvt_f32_u32x4(a0), self.cvt_f32_u32x4(a1))
}
#[inline(always)]
fn splat_mask32x8(self, val: i32) -> mask32x8<Self> {
let half = self.splat_mask32x4(val);
self.combine_mask32x4(half, half)
}
#[inline(always)]
fn load_array_mask32x8(self, val: [i32; 8usize]) -> mask32x8<Self> {
mask32x8 {
val: crate::support::Aligned256(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask32x8(self, val: &[i32; 8usize]) -> mask32x8<Self> {
mask32x8 {
val: crate::support::Aligned256(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_mask32x8(self, a: mask32x8<Self>) -> [i32; 8usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_mask32x8(self, a: &mask32x8<Self>) -> &[i32; 8usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_mask32x8(self, a: &mut mask32x8<Self>) -> &mut [i32; 8usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_mask32x8(self, a: mask32x8<Self>, dest: &mut [i32; 8usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_mask32x8(self, a: u8x32<Self>) -> mask32x8<Self> {
unsafe {
mask32x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask32x8(self, a: mask32x8<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask32x8<const SHIFT: usize>(
self,
a: mask32x8<Self>,
b: mask32x8<Self>,
) -> mask32x8<Self> {
let mut dest = [Default::default(); 8usize];
dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_mask32x8<const SHIFT: usize>(
self,
a: mask32x8<Self>,
b: mask32x8<Self>,
) -> mask32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_mask32x8(b);
self.combine_mask32x4(
self.slide_within_blocks_mask32x4::<SHIFT>(a0, b0),
self.slide_within_blocks_mask32x4::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn and_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_mask32x8(b);
self.combine_mask32x4(self.and_mask32x4(a0, b0), self.and_mask32x4(a1, b1))
}
#[inline(always)]
fn or_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_mask32x8(b);
self.combine_mask32x4(self.or_mask32x4(a0, b0), self.or_mask32x4(a1, b1))
}
#[inline(always)]
fn xor_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_mask32x8(b);
self.combine_mask32x4(self.xor_mask32x4(a0, b0), self.xor_mask32x4(a1, b1))
}
#[inline(always)]
fn not_mask32x8(self, a: mask32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
self.combine_mask32x4(self.not_mask32x4(a0), self.not_mask32x4(a1))
}
#[inline(always)]
fn select_mask32x8(
self,
a: mask32x8<Self>,
b: mask32x8<Self>,
c: mask32x8<Self>,
) -> mask32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_mask32x8(b);
let (c0, c1) = self.split_mask32x8(c);
self.combine_mask32x4(
self.select_mask32x4(a0, b0, c0),
self.select_mask32x4(a1, b1, c1),
)
}
#[inline(always)]
fn simd_eq_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_mask32x8(b);
self.combine_mask32x4(self.simd_eq_mask32x4(a0, b0), self.simd_eq_mask32x4(a1, b1))
}
#[inline(always)]
fn any_true_mask32x8(self, a: mask32x8<Self>) -> bool {
let (a0, a1) = self.split_mask32x8(a);
self.any_true_mask32x4(a0) || self.any_true_mask32x4(a1)
}
#[inline(always)]
fn all_true_mask32x8(self, a: mask32x8<Self>) -> bool {
let (a0, a1) = self.split_mask32x8(a);
self.all_true_mask32x4(a0) && self.all_true_mask32x4(a1)
}
#[inline(always)]
fn any_false_mask32x8(self, a: mask32x8<Self>) -> bool {
let (a0, a1) = self.split_mask32x8(a);
self.any_false_mask32x4(a0) || self.any_false_mask32x4(a1)
}
#[inline(always)]
fn all_false_mask32x8(self, a: mask32x8<Self>) -> bool {
let (a0, a1) = self.split_mask32x8(a);
self.all_false_mask32x4(a0) && self.all_false_mask32x4(a1)
}
#[inline(always)]
fn combine_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x16<Self> {
let mut result = [0; 16usize];
result[0..8usize].copy_from_slice(&a.val.0);
result[8usize..16usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn split_mask32x8(self, a: mask32x8<Self>) -> (mask32x4<Self>, mask32x4<Self>) {
let mut b0 = [0; 4usize];
let mut b1 = [0; 4usize];
b0.copy_from_slice(&a.val.0[0..4usize]);
b1.copy_from_slice(&a.val.0[4usize..8usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn splat_f64x4(self, val: f64) -> f64x4<Self> {
let half = self.splat_f64x2(val);
self.combine_f64x2(half, half)
}
#[inline(always)]
fn load_array_f64x4(self, val: [f64; 4usize]) -> f64x4<Self> {
f64x4 {
val: crate::support::Aligned256(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_f64x4(self, val: &[f64; 4usize]) -> f64x4<Self> {
f64x4 {
val: crate::support::Aligned256(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_f64x4(self, a: f64x4<Self>) -> [f64; 4usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_f64x4(self, a: &f64x4<Self>) -> &[f64; 4usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_f64x4(self, a: &mut f64x4<Self>) -> &mut [f64; 4usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_f64x4(self, a: f64x4<Self>, dest: &mut [f64; 4usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_f64x4(self, a: u8x32<Self>) -> f64x4<Self> {
unsafe {
f64x4 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_f64x4(self, a: f64x4<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_f64x4<const SHIFT: usize>(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let mut dest = [Default::default(); 4usize];
dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_f64x4<const SHIFT: usize>(
self,
a: f64x4<Self>,
b: f64x4<Self>,
) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(
self.slide_within_blocks_f64x2::<SHIFT>(a0, b0),
self.slide_within_blocks_f64x2::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn abs_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f64x2(self.abs_f64x2(a0), self.abs_f64x2(a1))
}
#[inline(always)]
fn neg_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f64x2(self.neg_f64x2(a0), self.neg_f64x2(a1))
}
#[inline(always)]
fn sqrt_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f64x2(self.sqrt_f64x2(a0), self.sqrt_f64x2(a1))
}
#[inline(always)]
fn add_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.add_f64x2(a0, b0), self.add_f64x2(a1, b1))
}
#[inline(always)]
fn sub_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.sub_f64x2(a0, b0), self.sub_f64x2(a1, b1))
}
#[inline(always)]
fn mul_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.mul_f64x2(a0, b0), self.mul_f64x2(a1, b1))
}
#[inline(always)]
fn div_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.div_f64x2(a0, b0), self.div_f64x2(a1, b1))
}
#[inline(always)]
fn copysign_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.copysign_f64x2(a0, b0), self.copysign_f64x2(a1, b1))
}
#[inline(always)]
fn simd_eq_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_mask64x2(self.simd_eq_f64x2(a0, b0), self.simd_eq_f64x2(a1, b1))
}
#[inline(always)]
fn simd_lt_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_mask64x2(self.simd_lt_f64x2(a0, b0), self.simd_lt_f64x2(a1, b1))
}
#[inline(always)]
fn simd_le_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_mask64x2(self.simd_le_f64x2(a0, b0), self.simd_le_f64x2(a1, b1))
}
#[inline(always)]
fn simd_ge_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_mask64x2(self.simd_ge_f64x2(a0, b0), self.simd_ge_f64x2(a1, b1))
}
#[inline(always)]
fn simd_gt_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_mask64x2(self.simd_gt_f64x2(a0, b0), self.simd_gt_f64x2(a1, b1))
}
#[inline(always)]
fn zip_low_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, _) = self.split_f64x4(a);
let (b0, _) = self.split_f64x4(b);
self.combine_f64x2(self.zip_low_f64x2(a0, b0), self.zip_high_f64x2(a0, b0))
}
#[inline(always)]
fn zip_high_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (_, a1) = self.split_f64x4(a);
let (_, b1) = self.split_f64x4(b);
self.combine_f64x2(self.zip_low_f64x2(a1, b1), self.zip_high_f64x2(a1, b1))
}
#[inline(always)]
fn unzip_low_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.unzip_low_f64x2(a0, a1), self.unzip_low_f64x2(b0, b1))
}
#[inline(always)]
fn unzip_high_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.unzip_high_f64x2(a0, a1), self.unzip_high_f64x2(b0, b1))
}
#[inline(always)]
fn max_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.max_f64x2(a0, b0), self.max_f64x2(a1, b1))
}
#[inline(always)]
fn min_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.min_f64x2(a0, b0), self.min_f64x2(a1, b1))
}
#[inline(always)]
fn max_precise_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(
self.max_precise_f64x2(a0, b0),
self.max_precise_f64x2(a1, b1),
)
}
#[inline(always)]
fn min_precise_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(
self.min_precise_f64x2(a0, b0),
self.min_precise_f64x2(a1, b1),
)
}
#[inline(always)]
fn mul_add_f64x4(self, a: f64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
let (c0, c1) = self.split_f64x4(c);
self.combine_f64x2(
self.mul_add_f64x2(a0, b0, c0),
self.mul_add_f64x2(a1, b1, c1),
)
}
#[inline(always)]
fn mul_sub_f64x4(self, a: f64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
let (c0, c1) = self.split_f64x4(c);
self.combine_f64x2(
self.mul_sub_f64x2(a0, b0, c0),
self.mul_sub_f64x2(a1, b1, c1),
)
}
#[inline(always)]
fn floor_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f64x2(self.floor_f64x2(a0), self.floor_f64x2(a1))
}
#[inline(always)]
fn ceil_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f64x2(self.ceil_f64x2(a0), self.ceil_f64x2(a1))
}
#[inline(always)]
fn round_ties_even_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f64x2(
self.round_ties_even_f64x2(a0),
self.round_ties_even_f64x2(a1),
)
}
#[inline(always)]
fn fract_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f64x2(self.fract_f64x2(a0), self.fract_f64x2(a1))
}
#[inline(always)]
fn trunc_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f64x2(self.trunc_f64x2(a0), self.trunc_f64x2(a1))
}
#[inline(always)]
fn select_f64x4(self, a: mask64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_mask64x4(a);
let (b0, b1) = self.split_f64x4(b);
let (c0, c1) = self.split_f64x4(c);
self.combine_f64x2(self.select_f64x2(a0, b0, c0), self.select_f64x2(a1, b1, c1))
}
#[inline(always)]
fn combine_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x8<Self> {
let mut result = [0.0; 8usize];
result[0..4usize].copy_from_slice(&a.val.0);
result[4usize..8usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn split_f64x4(self, a: f64x4<Self>) -> (f64x2<Self>, f64x2<Self>) {
let mut b0 = [0.0; 2usize];
let mut b1 = [0.0; 2usize];
b0.copy_from_slice(&a.val.0[0..2usize]);
b1.copy_from_slice(&a.val.0[2usize..4usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn reinterpret_f32_f64x4(self, a: f64x4<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f32x4(
self.reinterpret_f32_f64x2(a0),
self.reinterpret_f32_f64x2(a1),
)
}
#[inline(always)]
fn splat_mask64x4(self, val: i64) -> mask64x4<Self> {
let half = self.splat_mask64x2(val);
self.combine_mask64x2(half, half)
}
#[inline(always)]
fn load_array_mask64x4(self, val: [i64; 4usize]) -> mask64x4<Self> {
mask64x4 {
val: crate::support::Aligned256(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask64x4(self, val: &[i64; 4usize]) -> mask64x4<Self> {
mask64x4 {
val: crate::support::Aligned256(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_mask64x4(self, a: mask64x4<Self>) -> [i64; 4usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_mask64x4(self, a: &mask64x4<Self>) -> &[i64; 4usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_mask64x4(self, a: &mut mask64x4<Self>) -> &mut [i64; 4usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_mask64x4(self, a: mask64x4<Self>, dest: &mut [i64; 4usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_mask64x4(self, a: u8x32<Self>) -> mask64x4<Self> {
unsafe {
mask64x4 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask64x4(self, a: mask64x4<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask64x4<const SHIFT: usize>(
self,
a: mask64x4<Self>,
b: mask64x4<Self>,
) -> mask64x4<Self> {
let mut dest = [Default::default(); 4usize];
dest[..4usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[4usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_mask64x4<const SHIFT: usize>(
self,
a: mask64x4<Self>,
b: mask64x4<Self>,
) -> mask64x4<Self> {
let (a0, a1) = self.split_mask64x4(a);
let (b0, b1) = self.split_mask64x4(b);
self.combine_mask64x2(
self.slide_within_blocks_mask64x2::<SHIFT>(a0, b0),
self.slide_within_blocks_mask64x2::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn and_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_mask64x4(a);
let (b0, b1) = self.split_mask64x4(b);
self.combine_mask64x2(self.and_mask64x2(a0, b0), self.and_mask64x2(a1, b1))
}
#[inline(always)]
fn or_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_mask64x4(a);
let (b0, b1) = self.split_mask64x4(b);
self.combine_mask64x2(self.or_mask64x2(a0, b0), self.or_mask64x2(a1, b1))
}
#[inline(always)]
fn xor_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_mask64x4(a);
let (b0, b1) = self.split_mask64x4(b);
self.combine_mask64x2(self.xor_mask64x2(a0, b0), self.xor_mask64x2(a1, b1))
}
#[inline(always)]
fn not_mask64x4(self, a: mask64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_mask64x4(a);
self.combine_mask64x2(self.not_mask64x2(a0), self.not_mask64x2(a1))
}
#[inline(always)]
fn select_mask64x4(
self,
a: mask64x4<Self>,
b: mask64x4<Self>,
c: mask64x4<Self>,
) -> mask64x4<Self> {
let (a0, a1) = self.split_mask64x4(a);
let (b0, b1) = self.split_mask64x4(b);
let (c0, c1) = self.split_mask64x4(c);
self.combine_mask64x2(
self.select_mask64x2(a0, b0, c0),
self.select_mask64x2(a1, b1, c1),
)
}
#[inline(always)]
fn simd_eq_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_mask64x4(a);
let (b0, b1) = self.split_mask64x4(b);
self.combine_mask64x2(self.simd_eq_mask64x2(a0, b0), self.simd_eq_mask64x2(a1, b1))
}
#[inline(always)]
fn any_true_mask64x4(self, a: mask64x4<Self>) -> bool {
let (a0, a1) = self.split_mask64x4(a);
self.any_true_mask64x2(a0) || self.any_true_mask64x2(a1)
}
#[inline(always)]
fn all_true_mask64x4(self, a: mask64x4<Self>) -> bool {
let (a0, a1) = self.split_mask64x4(a);
self.all_true_mask64x2(a0) && self.all_true_mask64x2(a1)
}
#[inline(always)]
fn any_false_mask64x4(self, a: mask64x4<Self>) -> bool {
let (a0, a1) = self.split_mask64x4(a);
self.any_false_mask64x2(a0) || self.any_false_mask64x2(a1)
}
#[inline(always)]
fn all_false_mask64x4(self, a: mask64x4<Self>) -> bool {
let (a0, a1) = self.split_mask64x4(a);
self.all_false_mask64x2(a0) && self.all_false_mask64x2(a1)
}
#[inline(always)]
fn combine_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x8<Self> {
let mut result = [0; 8usize];
result[0..4usize].copy_from_slice(&a.val.0);
result[4usize..8usize].copy_from_slice(&b.val.0);
result.simd_into(self)
}
#[inline(always)]
fn split_mask64x4(self, a: mask64x4<Self>) -> (mask64x2<Self>, mask64x2<Self>) {
let mut b0 = [0; 2usize];
let mut b1 = [0; 2usize];
b0.copy_from_slice(&a.val.0[0..2usize]);
b1.copy_from_slice(&a.val.0[2usize..4usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn splat_f32x16(self, val: f32) -> f32x16<Self> {
let half = self.splat_f32x8(val);
self.combine_f32x8(half, half)
}
#[inline(always)]
fn load_array_f32x16(self, val: [f32; 16usize]) -> f32x16<Self> {
f32x16 {
val: crate::support::Aligned512(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_f32x16(self, val: &[f32; 16usize]) -> f32x16<Self> {
f32x16 {
val: crate::support::Aligned512(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_f32x16(self, a: f32x16<Self>) -> [f32; 16usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_f32x16(self, a: &f32x16<Self>) -> &[f32; 16usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_f32x16(self, a: &mut f32x16<Self>) -> &mut [f32; 16usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_f32x16(self, a: f32x16<Self>, dest: &mut [f32; 16usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_f32x16(self, a: u8x64<Self>) -> f32x16<Self> {
unsafe {
f32x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_f32x16(self, a: f32x16<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_f32x16<const SHIFT: usize>(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let mut dest = [Default::default(); 16usize];
dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_f32x16<const SHIFT: usize>(
self,
a: f32x16<Self>,
b: f32x16<Self>,
) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(
self.slide_within_blocks_f32x8::<SHIFT>(a0, b0),
self.slide_within_blocks_f32x8::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn abs_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f32x8(self.abs_f32x8(a0), self.abs_f32x8(a1))
}
#[inline(always)]
fn neg_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f32x8(self.neg_f32x8(a0), self.neg_f32x8(a1))
}
#[inline(always)]
fn sqrt_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f32x8(self.sqrt_f32x8(a0), self.sqrt_f32x8(a1))
}
#[inline(always)]
fn add_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.add_f32x8(a0, b0), self.add_f32x8(a1, b1))
}
#[inline(always)]
fn sub_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.sub_f32x8(a0, b0), self.sub_f32x8(a1, b1))
}
#[inline(always)]
fn mul_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.mul_f32x8(a0, b0), self.mul_f32x8(a1, b1))
}
#[inline(always)]
fn div_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.div_f32x8(a0, b0), self.div_f32x8(a1, b1))
}
#[inline(always)]
fn copysign_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.copysign_f32x8(a0, b0), self.copysign_f32x8(a1, b1))
}
#[inline(always)]
fn simd_eq_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_mask32x8(self.simd_eq_f32x8(a0, b0), self.simd_eq_f32x8(a1, b1))
}
#[inline(always)]
fn simd_lt_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_mask32x8(self.simd_lt_f32x8(a0, b0), self.simd_lt_f32x8(a1, b1))
}
#[inline(always)]
fn simd_le_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_mask32x8(self.simd_le_f32x8(a0, b0), self.simd_le_f32x8(a1, b1))
}
#[inline(always)]
fn simd_ge_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_mask32x8(self.simd_ge_f32x8(a0, b0), self.simd_ge_f32x8(a1, b1))
}
#[inline(always)]
fn simd_gt_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_mask32x8(self.simd_gt_f32x8(a0, b0), self.simd_gt_f32x8(a1, b1))
}
#[inline(always)]
fn zip_low_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, _) = self.split_f32x16(a);
let (b0, _) = self.split_f32x16(b);
self.combine_f32x8(self.zip_low_f32x8(a0, b0), self.zip_high_f32x8(a0, b0))
}
#[inline(always)]
fn zip_high_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (_, a1) = self.split_f32x16(a);
let (_, b1) = self.split_f32x16(b);
self.combine_f32x8(self.zip_low_f32x8(a1, b1), self.zip_high_f32x8(a1, b1))
}
#[inline(always)]
fn unzip_low_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.unzip_low_f32x8(a0, a1), self.unzip_low_f32x8(b0, b1))
}
#[inline(always)]
fn unzip_high_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.unzip_high_f32x8(a0, a1), self.unzip_high_f32x8(b0, b1))
}
#[inline(always)]
fn max_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.max_f32x8(a0, b0), self.max_f32x8(a1, b1))
}
#[inline(always)]
fn min_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.min_f32x8(a0, b0), self.min_f32x8(a1, b1))
}
#[inline(always)]
fn max_precise_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(
self.max_precise_f32x8(a0, b0),
self.max_precise_f32x8(a1, b1),
)
}
#[inline(always)]
fn min_precise_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(
self.min_precise_f32x8(a0, b0),
self.min_precise_f32x8(a1, b1),
)
}
#[inline(always)]
fn mul_add_f32x16(self, a: f32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
let (c0, c1) = self.split_f32x16(c);
self.combine_f32x8(
self.mul_add_f32x8(a0, b0, c0),
self.mul_add_f32x8(a1, b1, c1),
)
}
#[inline(always)]
fn mul_sub_f32x16(self, a: f32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
let (c0, c1) = self.split_f32x16(c);
self.combine_f32x8(
self.mul_sub_f32x8(a0, b0, c0),
self.mul_sub_f32x8(a1, b1, c1),
)
}
#[inline(always)]
fn floor_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f32x8(self.floor_f32x8(a0), self.floor_f32x8(a1))
}
#[inline(always)]
fn ceil_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f32x8(self.ceil_f32x8(a0), self.ceil_f32x8(a1))
}
#[inline(always)]
fn round_ties_even_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f32x8(
self.round_ties_even_f32x8(a0),
self.round_ties_even_f32x8(a1),
)
}
#[inline(always)]
fn fract_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f32x8(self.fract_f32x8(a0), self.fract_f32x8(a1))
}
#[inline(always)]
fn trunc_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f32x8(self.trunc_f32x8(a0), self.trunc_f32x8(a1))
}
#[inline(always)]
fn select_f32x16(self, a: mask32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_f32x16(b);
let (c0, c1) = self.split_f32x16(c);
self.combine_f32x8(self.select_f32x8(a0, b0, c0), self.select_f32x8(a1, b1, c1))
}
#[inline(always)]
fn split_f32x16(self, a: f32x16<Self>) -> (f32x8<Self>, f32x8<Self>) {
let mut b0 = [0.0; 8usize];
let mut b1 = [0.0; 8usize];
b0.copy_from_slice(&a.val.0[0..8usize]);
b1.copy_from_slice(&a.val.0[8usize..16usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn reinterpret_f64_f32x16(self, a: f32x16<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f64x4(
self.reinterpret_f64_f32x8(a0),
self.reinterpret_f64_f32x8(a1),
)
}
#[inline(always)]
fn reinterpret_i32_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_i32x8(
self.reinterpret_i32_f32x8(a0),
self.reinterpret_i32_f32x8(a1),
)
}
#[inline(always)]
fn load_interleaved_128_f32x16(self, src: &[f32; 16usize]) -> f32x16<Self> {
[
src[0usize],
src[4usize],
src[8usize],
src[12usize],
src[1usize],
src[5usize],
src[9usize],
src[13usize],
src[2usize],
src[6usize],
src[10usize],
src[14usize],
src[3usize],
src[7usize],
src[11usize],
src[15usize],
]
.simd_into(self)
}
#[inline(always)]
fn store_interleaved_128_f32x16(self, a: f32x16<Self>, dest: &mut [f32; 16usize]) -> () {
*dest = [
a[0usize], a[4usize], a[8usize], a[12usize], a[1usize], a[5usize], a[9usize],
a[13usize], a[2usize], a[6usize], a[10usize], a[14usize], a[3usize], a[7usize],
a[11usize], a[15usize],
];
}
#[inline(always)]
fn reinterpret_u8_f32x16(self, a: f32x16<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_u8x32(self.reinterpret_u8_f32x8(a0), self.reinterpret_u8_f32x8(a1))
}
#[inline(always)]
fn reinterpret_u32_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_u32x8(
self.reinterpret_u32_f32x8(a0),
self.reinterpret_u32_f32x8(a1),
)
}
#[inline(always)]
fn cvt_u32_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_u32x8(self.cvt_u32_f32x8(a0), self.cvt_u32_f32x8(a1))
}
#[inline(always)]
fn cvt_u32_precise_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_u32x8(
self.cvt_u32_precise_f32x8(a0),
self.cvt_u32_precise_f32x8(a1),
)
}
#[inline(always)]
fn cvt_i32_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_i32x8(self.cvt_i32_f32x8(a0), self.cvt_i32_f32x8(a1))
}
#[inline(always)]
fn cvt_i32_precise_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_i32x8(
self.cvt_i32_precise_f32x8(a0),
self.cvt_i32_precise_f32x8(a1),
)
}
#[inline(always)]
fn splat_i8x64(self, val: i8) -> i8x64<Self> {
let half = self.splat_i8x32(val);
self.combine_i8x32(half, half)
}
#[inline(always)]
fn load_array_i8x64(self, val: [i8; 64usize]) -> i8x64<Self> {
i8x64 {
val: crate::support::Aligned512(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i8x64(self, val: &[i8; 64usize]) -> i8x64<Self> {
i8x64 {
val: crate::support::Aligned512(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_i8x64(self, a: i8x64<Self>) -> [i8; 64usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_i8x64(self, a: &i8x64<Self>) -> &[i8; 64usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_i8x64(self, a: &mut i8x64<Self>) -> &mut [i8; 64usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_i8x64(self, a: i8x64<Self>, dest: &mut [i8; 64usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_i8x64(self, a: u8x64<Self>) -> i8x64<Self> {
unsafe {
i8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i8x64(self, a: i8x64<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i8x64<const SHIFT: usize>(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let mut dest = [Default::default(); 64usize];
dest[..64usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[64usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_i8x64<const SHIFT: usize>(
self,
a: i8x64<Self>,
b: i8x64<Self>,
) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(
self.slide_within_blocks_i8x32::<SHIFT>(a0, b0),
self.slide_within_blocks_i8x32::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.add_i8x32(a0, b0), self.add_i8x32(a1, b1))
}
#[inline(always)]
fn sub_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.sub_i8x32(a0, b0), self.sub_i8x32(a1, b1))
}
#[inline(always)]
fn mul_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.mul_i8x32(a0, b0), self.mul_i8x32(a1, b1))
}
#[inline(always)]
fn and_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.and_i8x32(a0, b0), self.and_i8x32(a1, b1))
}
#[inline(always)]
fn or_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.or_i8x32(a0, b0), self.or_i8x32(a1, b1))
}
#[inline(always)]
fn xor_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.xor_i8x32(a0, b0), self.xor_i8x32(a1, b1))
}
#[inline(always)]
fn not_i8x64(self, a: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
self.combine_i8x32(self.not_i8x32(a0), self.not_i8x32(a1))
}
#[inline(always)]
fn shl_i8x64(self, a: i8x64<Self>, shift: u32) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
self.combine_i8x32(self.shl_i8x32(a0, shift), self.shl_i8x32(a1, shift))
}
#[inline(always)]
fn shlv_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.shlv_i8x32(a0, b0), self.shlv_i8x32(a1, b1))
}
#[inline(always)]
fn shr_i8x64(self, a: i8x64<Self>, shift: u32) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
self.combine_i8x32(self.shr_i8x32(a0, shift), self.shr_i8x32(a1, shift))
}
#[inline(always)]
fn shrv_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.shrv_i8x32(a0, b0), self.shrv_i8x32(a1, b1))
}
#[inline(always)]
fn simd_eq_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_mask8x32(self.simd_eq_i8x32(a0, b0), self.simd_eq_i8x32(a1, b1))
}
#[inline(always)]
fn simd_lt_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_mask8x32(self.simd_lt_i8x32(a0, b0), self.simd_lt_i8x32(a1, b1))
}
#[inline(always)]
fn simd_le_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_mask8x32(self.simd_le_i8x32(a0, b0), self.simd_le_i8x32(a1, b1))
}
#[inline(always)]
fn simd_ge_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_mask8x32(self.simd_ge_i8x32(a0, b0), self.simd_ge_i8x32(a1, b1))
}
#[inline(always)]
fn simd_gt_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_mask8x32(self.simd_gt_i8x32(a0, b0), self.simd_gt_i8x32(a1, b1))
}
#[inline(always)]
fn zip_low_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, _) = self.split_i8x64(a);
let (b0, _) = self.split_i8x64(b);
self.combine_i8x32(self.zip_low_i8x32(a0, b0), self.zip_high_i8x32(a0, b0))
}
#[inline(always)]
fn zip_high_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (_, a1) = self.split_i8x64(a);
let (_, b1) = self.split_i8x64(b);
self.combine_i8x32(self.zip_low_i8x32(a1, b1), self.zip_high_i8x32(a1, b1))
}
#[inline(always)]
fn unzip_low_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.unzip_low_i8x32(a0, a1), self.unzip_low_i8x32(b0, b1))
}
#[inline(always)]
fn unzip_high_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.unzip_high_i8x32(a0, a1), self.unzip_high_i8x32(b0, b1))
}
#[inline(always)]
fn select_i8x64(self, a: mask8x64<Self>, b: i8x64<Self>, c: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_i8x64(b);
let (c0, c1) = self.split_i8x64(c);
self.combine_i8x32(self.select_i8x32(a0, b0, c0), self.select_i8x32(a1, b1, c1))
}
#[inline(always)]
fn min_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.min_i8x32(a0, b0), self.min_i8x32(a1, b1))
}
#[inline(always)]
fn max_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.max_i8x32(a0, b0), self.max_i8x32(a1, b1))
}
#[inline(always)]
fn split_i8x64(self, a: i8x64<Self>) -> (i8x32<Self>, i8x32<Self>) {
let mut b0 = [0; 32usize];
let mut b1 = [0; 32usize];
b0.copy_from_slice(&a.val.0[0..32usize]);
b1.copy_from_slice(&a.val.0[32usize..64usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn neg_i8x64(self, a: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
self.combine_i8x32(self.neg_i8x32(a0), self.neg_i8x32(a1))
}
#[inline(always)]
fn reinterpret_u8_i8x64(self, a: i8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
self.combine_u8x32(self.reinterpret_u8_i8x32(a0), self.reinterpret_u8_i8x32(a1))
}
#[inline(always)]
fn reinterpret_u32_i8x64(self, a: i8x64<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_i8x64(a);
self.combine_u32x8(
self.reinterpret_u32_i8x32(a0),
self.reinterpret_u32_i8x32(a1),
)
}
#[inline(always)]
fn splat_u8x64(self, val: u8) -> u8x64<Self> {
let half = self.splat_u8x32(val);
self.combine_u8x32(half, half)
}
#[inline(always)]
fn load_array_u8x64(self, val: [u8; 64usize]) -> u8x64<Self> {
u8x64 {
val: crate::support::Aligned512(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u8x64(self, val: &[u8; 64usize]) -> u8x64<Self> {
u8x64 {
val: crate::support::Aligned512(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_u8x64(self, a: u8x64<Self>) -> [u8; 64usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_u8x64(self, a: &u8x64<Self>) -> &[u8; 64usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_u8x64(self, a: &mut u8x64<Self>) -> &mut [u8; 64usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_u8x64(self, a: u8x64<Self>, dest: &mut [u8; 64usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_u8x64(self, a: u8x64<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u8x64(self, a: u8x64<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u8x64<const SHIFT: usize>(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let mut dest = [Default::default(); 64usize];
dest[..64usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[64usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_u8x64<const SHIFT: usize>(
self,
a: u8x64<Self>,
b: u8x64<Self>,
) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(
self.slide_within_blocks_u8x32::<SHIFT>(a0, b0),
self.slide_within_blocks_u8x32::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.add_u8x32(a0, b0), self.add_u8x32(a1, b1))
}
#[inline(always)]
fn sub_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.sub_u8x32(a0, b0), self.sub_u8x32(a1, b1))
}
#[inline(always)]
fn mul_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.mul_u8x32(a0, b0), self.mul_u8x32(a1, b1))
}
#[inline(always)]
fn and_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.and_u8x32(a0, b0), self.and_u8x32(a1, b1))
}
#[inline(always)]
fn or_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.or_u8x32(a0, b0), self.or_u8x32(a1, b1))
}
#[inline(always)]
fn xor_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.xor_u8x32(a0, b0), self.xor_u8x32(a1, b1))
}
#[inline(always)]
fn not_u8x64(self, a: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
self.combine_u8x32(self.not_u8x32(a0), self.not_u8x32(a1))
}
#[inline(always)]
fn shl_u8x64(self, a: u8x64<Self>, shift: u32) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
self.combine_u8x32(self.shl_u8x32(a0, shift), self.shl_u8x32(a1, shift))
}
#[inline(always)]
fn shlv_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.shlv_u8x32(a0, b0), self.shlv_u8x32(a1, b1))
}
#[inline(always)]
fn shr_u8x64(self, a: u8x64<Self>, shift: u32) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
self.combine_u8x32(self.shr_u8x32(a0, shift), self.shr_u8x32(a1, shift))
}
#[inline(always)]
fn shrv_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.shrv_u8x32(a0, b0), self.shrv_u8x32(a1, b1))
}
#[inline(always)]
fn simd_eq_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_mask8x32(self.simd_eq_u8x32(a0, b0), self.simd_eq_u8x32(a1, b1))
}
#[inline(always)]
fn simd_lt_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_mask8x32(self.simd_lt_u8x32(a0, b0), self.simd_lt_u8x32(a1, b1))
}
#[inline(always)]
fn simd_le_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_mask8x32(self.simd_le_u8x32(a0, b0), self.simd_le_u8x32(a1, b1))
}
#[inline(always)]
fn simd_ge_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_mask8x32(self.simd_ge_u8x32(a0, b0), self.simd_ge_u8x32(a1, b1))
}
#[inline(always)]
fn simd_gt_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_mask8x32(self.simd_gt_u8x32(a0, b0), self.simd_gt_u8x32(a1, b1))
}
#[inline(always)]
fn zip_low_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, _) = self.split_u8x64(a);
let (b0, _) = self.split_u8x64(b);
self.combine_u8x32(self.zip_low_u8x32(a0, b0), self.zip_high_u8x32(a0, b0))
}
#[inline(always)]
fn zip_high_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (_, a1) = self.split_u8x64(a);
let (_, b1) = self.split_u8x64(b);
self.combine_u8x32(self.zip_low_u8x32(a1, b1), self.zip_high_u8x32(a1, b1))
}
#[inline(always)]
fn unzip_low_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.unzip_low_u8x32(a0, a1), self.unzip_low_u8x32(b0, b1))
}
#[inline(always)]
fn unzip_high_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.unzip_high_u8x32(a0, a1), self.unzip_high_u8x32(b0, b1))
}
#[inline(always)]
fn select_u8x64(self, a: mask8x64<Self>, b: u8x64<Self>, c: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_u8x64(b);
let (c0, c1) = self.split_u8x64(c);
self.combine_u8x32(self.select_u8x32(a0, b0, c0), self.select_u8x32(a1, b1, c1))
}
#[inline(always)]
fn min_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.min_u8x32(a0, b0), self.min_u8x32(a1, b1))
}
#[inline(always)]
fn max_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.max_u8x32(a0, b0), self.max_u8x32(a1, b1))
}
#[inline(always)]
fn split_u8x64(self, a: u8x64<Self>) -> (u8x32<Self>, u8x32<Self>) {
let mut b0 = [0; 32usize];
let mut b1 = [0; 32usize];
b0.copy_from_slice(&a.val.0[0..32usize]);
b1.copy_from_slice(&a.val.0[32usize..64usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn load_interleaved_128_u8x64(self, src: &[u8; 64usize]) -> u8x64<Self> {
[
src[0usize],
src[4usize],
src[8usize],
src[12usize],
src[16usize],
src[20usize],
src[24usize],
src[28usize],
src[32usize],
src[36usize],
src[40usize],
src[44usize],
src[48usize],
src[52usize],
src[56usize],
src[60usize],
src[1usize],
src[5usize],
src[9usize],
src[13usize],
src[17usize],
src[21usize],
src[25usize],
src[29usize],
src[33usize],
src[37usize],
src[41usize],
src[45usize],
src[49usize],
src[53usize],
src[57usize],
src[61usize],
src[2usize],
src[6usize],
src[10usize],
src[14usize],
src[18usize],
src[22usize],
src[26usize],
src[30usize],
src[34usize],
src[38usize],
src[42usize],
src[46usize],
src[50usize],
src[54usize],
src[58usize],
src[62usize],
src[3usize],
src[7usize],
src[11usize],
src[15usize],
src[19usize],
src[23usize],
src[27usize],
src[31usize],
src[35usize],
src[39usize],
src[43usize],
src[47usize],
src[51usize],
src[55usize],
src[59usize],
src[63usize],
]
.simd_into(self)
}
#[inline(always)]
fn store_interleaved_128_u8x64(self, a: u8x64<Self>, dest: &mut [u8; 64usize]) -> () {
*dest = [
a[0usize], a[16usize], a[32usize], a[48usize], a[1usize], a[17usize], a[33usize],
a[49usize], a[2usize], a[18usize], a[34usize], a[50usize], a[3usize], a[19usize],
a[35usize], a[51usize], a[4usize], a[20usize], a[36usize], a[52usize], a[5usize],
a[21usize], a[37usize], a[53usize], a[6usize], a[22usize], a[38usize], a[54usize],
a[7usize], a[23usize], a[39usize], a[55usize], a[8usize], a[24usize], a[40usize],
a[56usize], a[9usize], a[25usize], a[41usize], a[57usize], a[10usize], a[26usize],
a[42usize], a[58usize], a[11usize], a[27usize], a[43usize], a[59usize], a[12usize],
a[28usize], a[44usize], a[60usize], a[13usize], a[29usize], a[45usize], a[61usize],
a[14usize], a[30usize], a[46usize], a[62usize], a[15usize], a[31usize], a[47usize],
a[63usize],
];
}
#[inline(always)]
fn reinterpret_u32_u8x64(self, a: u8x64<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u8x64(a);
self.combine_u32x8(
self.reinterpret_u32_u8x32(a0),
self.reinterpret_u32_u8x32(a1),
)
}
#[inline(always)]
fn splat_mask8x64(self, val: i8) -> mask8x64<Self> {
let half = self.splat_mask8x32(val);
self.combine_mask8x32(half, half)
}
#[inline(always)]
fn load_array_mask8x64(self, val: [i8; 64usize]) -> mask8x64<Self> {
mask8x64 {
val: crate::support::Aligned512(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask8x64(self, val: &[i8; 64usize]) -> mask8x64<Self> {
mask8x64 {
val: crate::support::Aligned512(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_mask8x64(self, a: mask8x64<Self>) -> [i8; 64usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_mask8x64(self, a: &mask8x64<Self>) -> &[i8; 64usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_mask8x64(self, a: &mut mask8x64<Self>) -> &mut [i8; 64usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_mask8x64(self, a: mask8x64<Self>, dest: &mut [i8; 64usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_mask8x64(self, a: u8x64<Self>) -> mask8x64<Self> {
unsafe {
mask8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask8x64(self, a: mask8x64<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask8x64<const SHIFT: usize>(
self,
a: mask8x64<Self>,
b: mask8x64<Self>,
) -> mask8x64<Self> {
let mut dest = [Default::default(); 64usize];
dest[..64usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[64usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_mask8x64<const SHIFT: usize>(
self,
a: mask8x64<Self>,
b: mask8x64<Self>,
) -> mask8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_mask8x64(b);
self.combine_mask8x32(
self.slide_within_blocks_mask8x32::<SHIFT>(a0, b0),
self.slide_within_blocks_mask8x32::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn and_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_mask8x64(b);
self.combine_mask8x32(self.and_mask8x32(a0, b0), self.and_mask8x32(a1, b1))
}
#[inline(always)]
fn or_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_mask8x64(b);
self.combine_mask8x32(self.or_mask8x32(a0, b0), self.or_mask8x32(a1, b1))
}
#[inline(always)]
fn xor_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_mask8x64(b);
self.combine_mask8x32(self.xor_mask8x32(a0, b0), self.xor_mask8x32(a1, b1))
}
#[inline(always)]
fn not_mask8x64(self, a: mask8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
self.combine_mask8x32(self.not_mask8x32(a0), self.not_mask8x32(a1))
}
#[inline(always)]
fn select_mask8x64(
self,
a: mask8x64<Self>,
b: mask8x64<Self>,
c: mask8x64<Self>,
) -> mask8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_mask8x64(b);
let (c0, c1) = self.split_mask8x64(c);
self.combine_mask8x32(
self.select_mask8x32(a0, b0, c0),
self.select_mask8x32(a1, b1, c1),
)
}
#[inline(always)]
fn simd_eq_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_mask8x64(b);
self.combine_mask8x32(self.simd_eq_mask8x32(a0, b0), self.simd_eq_mask8x32(a1, b1))
}
#[inline(always)]
fn any_true_mask8x64(self, a: mask8x64<Self>) -> bool {
let (a0, a1) = self.split_mask8x64(a);
self.any_true_mask8x32(a0) || self.any_true_mask8x32(a1)
}
#[inline(always)]
fn all_true_mask8x64(self, a: mask8x64<Self>) -> bool {
let (a0, a1) = self.split_mask8x64(a);
self.all_true_mask8x32(a0) && self.all_true_mask8x32(a1)
}
#[inline(always)]
fn any_false_mask8x64(self, a: mask8x64<Self>) -> bool {
let (a0, a1) = self.split_mask8x64(a);
self.any_false_mask8x32(a0) || self.any_false_mask8x32(a1)
}
#[inline(always)]
fn all_false_mask8x64(self, a: mask8x64<Self>) -> bool {
let (a0, a1) = self.split_mask8x64(a);
self.all_false_mask8x32(a0) && self.all_false_mask8x32(a1)
}
#[inline(always)]
fn split_mask8x64(self, a: mask8x64<Self>) -> (mask8x32<Self>, mask8x32<Self>) {
let mut b0 = [0; 32usize];
let mut b1 = [0; 32usize];
b0.copy_from_slice(&a.val.0[0..32usize]);
b1.copy_from_slice(&a.val.0[32usize..64usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn splat_i16x32(self, val: i16) -> i16x32<Self> {
let half = self.splat_i16x16(val);
self.combine_i16x16(half, half)
}
#[inline(always)]
fn load_array_i16x32(self, val: [i16; 32usize]) -> i16x32<Self> {
i16x32 {
val: crate::support::Aligned512(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i16x32(self, val: &[i16; 32usize]) -> i16x32<Self> {
i16x32 {
val: crate::support::Aligned512(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_i16x32(self, a: i16x32<Self>) -> [i16; 32usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_i16x32(self, a: &i16x32<Self>) -> &[i16; 32usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_i16x32(self, a: &mut i16x32<Self>) -> &mut [i16; 32usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_i16x32(self, a: i16x32<Self>, dest: &mut [i16; 32usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_i16x32(self, a: u8x64<Self>) -> i16x32<Self> {
unsafe {
i16x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i16x32(self, a: i16x32<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i16x32<const SHIFT: usize>(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let mut dest = [Default::default(); 32usize];
dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_i16x32<const SHIFT: usize>(
self,
a: i16x32<Self>,
b: i16x32<Self>,
) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(
self.slide_within_blocks_i16x16::<SHIFT>(a0, b0),
self.slide_within_blocks_i16x16::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.add_i16x16(a0, b0), self.add_i16x16(a1, b1))
}
#[inline(always)]
fn sub_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.sub_i16x16(a0, b0), self.sub_i16x16(a1, b1))
}
#[inline(always)]
fn mul_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.mul_i16x16(a0, b0), self.mul_i16x16(a1, b1))
}
#[inline(always)]
fn and_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.and_i16x16(a0, b0), self.and_i16x16(a1, b1))
}
#[inline(always)]
fn or_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.or_i16x16(a0, b0), self.or_i16x16(a1, b1))
}
#[inline(always)]
fn xor_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.xor_i16x16(a0, b0), self.xor_i16x16(a1, b1))
}
#[inline(always)]
fn not_i16x32(self, a: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
self.combine_i16x16(self.not_i16x16(a0), self.not_i16x16(a1))
}
#[inline(always)]
fn shl_i16x32(self, a: i16x32<Self>, shift: u32) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
self.combine_i16x16(self.shl_i16x16(a0, shift), self.shl_i16x16(a1, shift))
}
#[inline(always)]
fn shlv_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.shlv_i16x16(a0, b0), self.shlv_i16x16(a1, b1))
}
#[inline(always)]
fn shr_i16x32(self, a: i16x32<Self>, shift: u32) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
self.combine_i16x16(self.shr_i16x16(a0, shift), self.shr_i16x16(a1, shift))
}
#[inline(always)]
fn shrv_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.shrv_i16x16(a0, b0), self.shrv_i16x16(a1, b1))
}
#[inline(always)]
fn simd_eq_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_mask16x16(self.simd_eq_i16x16(a0, b0), self.simd_eq_i16x16(a1, b1))
}
#[inline(always)]
fn simd_lt_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_mask16x16(self.simd_lt_i16x16(a0, b0), self.simd_lt_i16x16(a1, b1))
}
#[inline(always)]
fn simd_le_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_mask16x16(self.simd_le_i16x16(a0, b0), self.simd_le_i16x16(a1, b1))
}
#[inline(always)]
fn simd_ge_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_mask16x16(self.simd_ge_i16x16(a0, b0), self.simd_ge_i16x16(a1, b1))
}
#[inline(always)]
fn simd_gt_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_mask16x16(self.simd_gt_i16x16(a0, b0), self.simd_gt_i16x16(a1, b1))
}
#[inline(always)]
fn zip_low_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, _) = self.split_i16x32(a);
let (b0, _) = self.split_i16x32(b);
self.combine_i16x16(self.zip_low_i16x16(a0, b0), self.zip_high_i16x16(a0, b0))
}
#[inline(always)]
fn zip_high_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (_, a1) = self.split_i16x32(a);
let (_, b1) = self.split_i16x32(b);
self.combine_i16x16(self.zip_low_i16x16(a1, b1), self.zip_high_i16x16(a1, b1))
}
#[inline(always)]
fn unzip_low_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.unzip_low_i16x16(a0, a1), self.unzip_low_i16x16(b0, b1))
}
#[inline(always)]
fn unzip_high_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(
self.unzip_high_i16x16(a0, a1),
self.unzip_high_i16x16(b0, b1),
)
}
#[inline(always)]
fn select_i16x32(self, a: mask16x32<Self>, b: i16x32<Self>, c: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_i16x32(b);
let (c0, c1) = self.split_i16x32(c);
self.combine_i16x16(
self.select_i16x16(a0, b0, c0),
self.select_i16x16(a1, b1, c1),
)
}
#[inline(always)]
fn min_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.min_i16x16(a0, b0), self.min_i16x16(a1, b1))
}
#[inline(always)]
fn max_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.max_i16x16(a0, b0), self.max_i16x16(a1, b1))
}
#[inline(always)]
fn split_i16x32(self, a: i16x32<Self>) -> (i16x16<Self>, i16x16<Self>) {
let mut b0 = [0; 16usize];
let mut b1 = [0; 16usize];
b0.copy_from_slice(&a.val.0[0..16usize]);
b1.copy_from_slice(&a.val.0[16usize..32usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn neg_i16x32(self, a: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
self.combine_i16x16(self.neg_i16x16(a0), self.neg_i16x16(a1))
}
#[inline(always)]
fn reinterpret_u8_i16x32(self, a: i16x32<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_i16x32(a);
self.combine_u8x32(
self.reinterpret_u8_i16x16(a0),
self.reinterpret_u8_i16x16(a1),
)
}
#[inline(always)]
fn reinterpret_u32_i16x32(self, a: i16x32<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_i16x32(a);
self.combine_u32x8(
self.reinterpret_u32_i16x16(a0),
self.reinterpret_u32_i16x16(a1),
)
}
#[inline(always)]
fn splat_u16x32(self, val: u16) -> u16x32<Self> {
let half = self.splat_u16x16(val);
self.combine_u16x16(half, half)
}
#[inline(always)]
fn load_array_u16x32(self, val: [u16; 32usize]) -> u16x32<Self> {
u16x32 {
val: crate::support::Aligned512(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u16x32(self, val: &[u16; 32usize]) -> u16x32<Self> {
u16x32 {
val: crate::support::Aligned512(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_u16x32(self, a: u16x32<Self>) -> [u16; 32usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_u16x32(self, a: &u16x32<Self>) -> &[u16; 32usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_u16x32(self, a: &mut u16x32<Self>) -> &mut [u16; 32usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_u16x32(self, a: u16x32<Self>, dest: &mut [u16; 32usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_u16x32(self, a: u8x64<Self>) -> u16x32<Self> {
unsafe {
u16x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u16x32(self, a: u16x32<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u16x32<const SHIFT: usize>(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let mut dest = [Default::default(); 32usize];
dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_u16x32<const SHIFT: usize>(
self,
a: u16x32<Self>,
b: u16x32<Self>,
) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(
self.slide_within_blocks_u16x16::<SHIFT>(a0, b0),
self.slide_within_blocks_u16x16::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.add_u16x16(a0, b0), self.add_u16x16(a1, b1))
}
#[inline(always)]
fn sub_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.sub_u16x16(a0, b0), self.sub_u16x16(a1, b1))
}
#[inline(always)]
fn mul_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.mul_u16x16(a0, b0), self.mul_u16x16(a1, b1))
}
#[inline(always)]
fn and_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.and_u16x16(a0, b0), self.and_u16x16(a1, b1))
}
#[inline(always)]
fn or_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.or_u16x16(a0, b0), self.or_u16x16(a1, b1))
}
#[inline(always)]
fn xor_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.xor_u16x16(a0, b0), self.xor_u16x16(a1, b1))
}
#[inline(always)]
fn not_u16x32(self, a: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
self.combine_u16x16(self.not_u16x16(a0), self.not_u16x16(a1))
}
#[inline(always)]
fn shl_u16x32(self, a: u16x32<Self>, shift: u32) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
self.combine_u16x16(self.shl_u16x16(a0, shift), self.shl_u16x16(a1, shift))
}
#[inline(always)]
fn shlv_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.shlv_u16x16(a0, b0), self.shlv_u16x16(a1, b1))
}
#[inline(always)]
fn shr_u16x32(self, a: u16x32<Self>, shift: u32) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
self.combine_u16x16(self.shr_u16x16(a0, shift), self.shr_u16x16(a1, shift))
}
#[inline(always)]
fn shrv_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.shrv_u16x16(a0, b0), self.shrv_u16x16(a1, b1))
}
#[inline(always)]
fn simd_eq_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_mask16x16(self.simd_eq_u16x16(a0, b0), self.simd_eq_u16x16(a1, b1))
}
#[inline(always)]
fn simd_lt_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_mask16x16(self.simd_lt_u16x16(a0, b0), self.simd_lt_u16x16(a1, b1))
}
#[inline(always)]
fn simd_le_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_mask16x16(self.simd_le_u16x16(a0, b0), self.simd_le_u16x16(a1, b1))
}
#[inline(always)]
fn simd_ge_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_mask16x16(self.simd_ge_u16x16(a0, b0), self.simd_ge_u16x16(a1, b1))
}
#[inline(always)]
fn simd_gt_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_mask16x16(self.simd_gt_u16x16(a0, b0), self.simd_gt_u16x16(a1, b1))
}
#[inline(always)]
fn zip_low_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, _) = self.split_u16x32(a);
let (b0, _) = self.split_u16x32(b);
self.combine_u16x16(self.zip_low_u16x16(a0, b0), self.zip_high_u16x16(a0, b0))
}
#[inline(always)]
fn zip_high_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (_, a1) = self.split_u16x32(a);
let (_, b1) = self.split_u16x32(b);
self.combine_u16x16(self.zip_low_u16x16(a1, b1), self.zip_high_u16x16(a1, b1))
}
#[inline(always)]
fn unzip_low_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.unzip_low_u16x16(a0, a1), self.unzip_low_u16x16(b0, b1))
}
#[inline(always)]
fn unzip_high_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(
self.unzip_high_u16x16(a0, a1),
self.unzip_high_u16x16(b0, b1),
)
}
#[inline(always)]
fn select_u16x32(self, a: mask16x32<Self>, b: u16x32<Self>, c: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_u16x32(b);
let (c0, c1) = self.split_u16x32(c);
self.combine_u16x16(
self.select_u16x16(a0, b0, c0),
self.select_u16x16(a1, b1, c1),
)
}
#[inline(always)]
fn min_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.min_u16x16(a0, b0), self.min_u16x16(a1, b1))
}
#[inline(always)]
fn max_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.max_u16x16(a0, b0), self.max_u16x16(a1, b1))
}
#[inline(always)]
fn split_u16x32(self, a: u16x32<Self>) -> (u16x16<Self>, u16x16<Self>) {
let mut b0 = [0; 16usize];
let mut b1 = [0; 16usize];
b0.copy_from_slice(&a.val.0[0..16usize]);
b1.copy_from_slice(&a.val.0[16usize..32usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn load_interleaved_128_u16x32(self, src: &[u16; 32usize]) -> u16x32<Self> {
[
src[0usize],
src[4usize],
src[8usize],
src[12usize],
src[16usize],
src[20usize],
src[24usize],
src[28usize],
src[1usize],
src[5usize],
src[9usize],
src[13usize],
src[17usize],
src[21usize],
src[25usize],
src[29usize],
src[2usize],
src[6usize],
src[10usize],
src[14usize],
src[18usize],
src[22usize],
src[26usize],
src[30usize],
src[3usize],
src[7usize],
src[11usize],
src[15usize],
src[19usize],
src[23usize],
src[27usize],
src[31usize],
]
.simd_into(self)
}
#[inline(always)]
fn store_interleaved_128_u16x32(self, a: u16x32<Self>, dest: &mut [u16; 32usize]) -> () {
*dest = [
a[0usize], a[8usize], a[16usize], a[24usize], a[1usize], a[9usize], a[17usize],
a[25usize], a[2usize], a[10usize], a[18usize], a[26usize], a[3usize], a[11usize],
a[19usize], a[27usize], a[4usize], a[12usize], a[20usize], a[28usize], a[5usize],
a[13usize], a[21usize], a[29usize], a[6usize], a[14usize], a[22usize], a[30usize],
a[7usize], a[15usize], a[23usize], a[31usize],
];
}
#[inline(always)]
fn narrow_u16x32(self, a: u16x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u16x32(a);
self.combine_u8x16(self.narrow_u16x16(a0), self.narrow_u16x16(a1))
}
#[inline(always)]
fn reinterpret_u8_u16x32(self, a: u16x32<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u16x32(a);
self.combine_u8x32(
self.reinterpret_u8_u16x16(a0),
self.reinterpret_u8_u16x16(a1),
)
}
#[inline(always)]
fn reinterpret_u32_u16x32(self, a: u16x32<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u16x32(a);
self.combine_u32x8(
self.reinterpret_u32_u16x16(a0),
self.reinterpret_u32_u16x16(a1),
)
}
#[inline(always)]
fn splat_mask16x32(self, val: i16) -> mask16x32<Self> {
let half = self.splat_mask16x16(val);
self.combine_mask16x16(half, half)
}
#[inline(always)]
fn load_array_mask16x32(self, val: [i16; 32usize]) -> mask16x32<Self> {
mask16x32 {
val: crate::support::Aligned512(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask16x32(self, val: &[i16; 32usize]) -> mask16x32<Self> {
mask16x32 {
val: crate::support::Aligned512(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_mask16x32(self, a: mask16x32<Self>) -> [i16; 32usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_mask16x32(self, a: &mask16x32<Self>) -> &[i16; 32usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_mask16x32(self, a: &mut mask16x32<Self>) -> &mut [i16; 32usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_mask16x32(self, a: mask16x32<Self>, dest: &mut [i16; 32usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_mask16x32(self, a: u8x64<Self>) -> mask16x32<Self> {
unsafe {
mask16x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask16x32(self, a: mask16x32<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask16x32<const SHIFT: usize>(
self,
a: mask16x32<Self>,
b: mask16x32<Self>,
) -> mask16x32<Self> {
let mut dest = [Default::default(); 32usize];
dest[..32usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[32usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_mask16x32<const SHIFT: usize>(
self,
a: mask16x32<Self>,
b: mask16x32<Self>,
) -> mask16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_mask16x32(b);
self.combine_mask16x16(
self.slide_within_blocks_mask16x16::<SHIFT>(a0, b0),
self.slide_within_blocks_mask16x16::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn and_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_mask16x32(b);
self.combine_mask16x16(self.and_mask16x16(a0, b0), self.and_mask16x16(a1, b1))
}
#[inline(always)]
fn or_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_mask16x32(b);
self.combine_mask16x16(self.or_mask16x16(a0, b0), self.or_mask16x16(a1, b1))
}
#[inline(always)]
fn xor_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_mask16x32(b);
self.combine_mask16x16(self.xor_mask16x16(a0, b0), self.xor_mask16x16(a1, b1))
}
#[inline(always)]
fn not_mask16x32(self, a: mask16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
self.combine_mask16x16(self.not_mask16x16(a0), self.not_mask16x16(a1))
}
#[inline(always)]
fn select_mask16x32(
self,
a: mask16x32<Self>,
b: mask16x32<Self>,
c: mask16x32<Self>,
) -> mask16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_mask16x32(b);
let (c0, c1) = self.split_mask16x32(c);
self.combine_mask16x16(
self.select_mask16x16(a0, b0, c0),
self.select_mask16x16(a1, b1, c1),
)
}
#[inline(always)]
fn simd_eq_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_mask16x32(b);
self.combine_mask16x16(
self.simd_eq_mask16x16(a0, b0),
self.simd_eq_mask16x16(a1, b1),
)
}
#[inline(always)]
fn any_true_mask16x32(self, a: mask16x32<Self>) -> bool {
let (a0, a1) = self.split_mask16x32(a);
self.any_true_mask16x16(a0) || self.any_true_mask16x16(a1)
}
#[inline(always)]
fn all_true_mask16x32(self, a: mask16x32<Self>) -> bool {
let (a0, a1) = self.split_mask16x32(a);
self.all_true_mask16x16(a0) && self.all_true_mask16x16(a1)
}
#[inline(always)]
fn any_false_mask16x32(self, a: mask16x32<Self>) -> bool {
let (a0, a1) = self.split_mask16x32(a);
self.any_false_mask16x16(a0) || self.any_false_mask16x16(a1)
}
#[inline(always)]
fn all_false_mask16x32(self, a: mask16x32<Self>) -> bool {
let (a0, a1) = self.split_mask16x32(a);
self.all_false_mask16x16(a0) && self.all_false_mask16x16(a1)
}
#[inline(always)]
fn split_mask16x32(self, a: mask16x32<Self>) -> (mask16x16<Self>, mask16x16<Self>) {
let mut b0 = [0; 16usize];
let mut b1 = [0; 16usize];
b0.copy_from_slice(&a.val.0[0..16usize]);
b1.copy_from_slice(&a.val.0[16usize..32usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn splat_i32x16(self, val: i32) -> i32x16<Self> {
let half = self.splat_i32x8(val);
self.combine_i32x8(half, half)
}
#[inline(always)]
fn load_array_i32x16(self, val: [i32; 16usize]) -> i32x16<Self> {
i32x16 {
val: crate::support::Aligned512(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i32x16(self, val: &[i32; 16usize]) -> i32x16<Self> {
i32x16 {
val: crate::support::Aligned512(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_i32x16(self, a: i32x16<Self>) -> [i32; 16usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_i32x16(self, a: &i32x16<Self>) -> &[i32; 16usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_i32x16(self, a: &mut i32x16<Self>) -> &mut [i32; 16usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_i32x16(self, a: i32x16<Self>, dest: &mut [i32; 16usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_i32x16(self, a: u8x64<Self>) -> i32x16<Self> {
unsafe {
i32x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i32x16(self, a: i32x16<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i32x16<const SHIFT: usize>(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let mut dest = [Default::default(); 16usize];
dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_i32x16<const SHIFT: usize>(
self,
a: i32x16<Self>,
b: i32x16<Self>,
) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(
self.slide_within_blocks_i32x8::<SHIFT>(a0, b0),
self.slide_within_blocks_i32x8::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.add_i32x8(a0, b0), self.add_i32x8(a1, b1))
}
#[inline(always)]
fn sub_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.sub_i32x8(a0, b0), self.sub_i32x8(a1, b1))
}
#[inline(always)]
fn mul_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.mul_i32x8(a0, b0), self.mul_i32x8(a1, b1))
}
#[inline(always)]
fn and_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.and_i32x8(a0, b0), self.and_i32x8(a1, b1))
}
#[inline(always)]
fn or_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.or_i32x8(a0, b0), self.or_i32x8(a1, b1))
}
#[inline(always)]
fn xor_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.xor_i32x8(a0, b0), self.xor_i32x8(a1, b1))
}
#[inline(always)]
fn not_i32x16(self, a: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
self.combine_i32x8(self.not_i32x8(a0), self.not_i32x8(a1))
}
#[inline(always)]
fn shl_i32x16(self, a: i32x16<Self>, shift: u32) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
self.combine_i32x8(self.shl_i32x8(a0, shift), self.shl_i32x8(a1, shift))
}
#[inline(always)]
fn shlv_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.shlv_i32x8(a0, b0), self.shlv_i32x8(a1, b1))
}
#[inline(always)]
fn shr_i32x16(self, a: i32x16<Self>, shift: u32) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
self.combine_i32x8(self.shr_i32x8(a0, shift), self.shr_i32x8(a1, shift))
}
#[inline(always)]
fn shrv_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.shrv_i32x8(a0, b0), self.shrv_i32x8(a1, b1))
}
#[inline(always)]
fn simd_eq_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_mask32x8(self.simd_eq_i32x8(a0, b0), self.simd_eq_i32x8(a1, b1))
}
#[inline(always)]
fn simd_lt_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_mask32x8(self.simd_lt_i32x8(a0, b0), self.simd_lt_i32x8(a1, b1))
}
#[inline(always)]
fn simd_le_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_mask32x8(self.simd_le_i32x8(a0, b0), self.simd_le_i32x8(a1, b1))
}
#[inline(always)]
fn simd_ge_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_mask32x8(self.simd_ge_i32x8(a0, b0), self.simd_ge_i32x8(a1, b1))
}
#[inline(always)]
fn simd_gt_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_mask32x8(self.simd_gt_i32x8(a0, b0), self.simd_gt_i32x8(a1, b1))
}
#[inline(always)]
fn zip_low_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, _) = self.split_i32x16(a);
let (b0, _) = self.split_i32x16(b);
self.combine_i32x8(self.zip_low_i32x8(a0, b0), self.zip_high_i32x8(a0, b0))
}
#[inline(always)]
fn zip_high_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (_, a1) = self.split_i32x16(a);
let (_, b1) = self.split_i32x16(b);
self.combine_i32x8(self.zip_low_i32x8(a1, b1), self.zip_high_i32x8(a1, b1))
}
#[inline(always)]
fn unzip_low_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.unzip_low_i32x8(a0, a1), self.unzip_low_i32x8(b0, b1))
}
#[inline(always)]
fn unzip_high_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.unzip_high_i32x8(a0, a1), self.unzip_high_i32x8(b0, b1))
}
#[inline(always)]
fn select_i32x16(self, a: mask32x16<Self>, b: i32x16<Self>, c: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_i32x16(b);
let (c0, c1) = self.split_i32x16(c);
self.combine_i32x8(self.select_i32x8(a0, b0, c0), self.select_i32x8(a1, b1, c1))
}
#[inline(always)]
fn min_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.min_i32x8(a0, b0), self.min_i32x8(a1, b1))
}
#[inline(always)]
fn max_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.max_i32x8(a0, b0), self.max_i32x8(a1, b1))
}
#[inline(always)]
fn split_i32x16(self, a: i32x16<Self>) -> (i32x8<Self>, i32x8<Self>) {
let mut b0 = [0; 8usize];
let mut b1 = [0; 8usize];
b0.copy_from_slice(&a.val.0[0..8usize]);
b1.copy_from_slice(&a.val.0[8usize..16usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn neg_i32x16(self, a: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
self.combine_i32x8(self.neg_i32x8(a0), self.neg_i32x8(a1))
}
#[inline(always)]
fn reinterpret_u8_i32x16(self, a: i32x16<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_i32x16(a);
self.combine_u8x32(self.reinterpret_u8_i32x8(a0), self.reinterpret_u8_i32x8(a1))
}
#[inline(always)]
fn reinterpret_u32_i32x16(self, a: i32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
self.combine_u32x8(
self.reinterpret_u32_i32x8(a0),
self.reinterpret_u32_i32x8(a1),
)
}
#[inline(always)]
fn cvt_f32_i32x16(self, a: i32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
self.combine_f32x8(self.cvt_f32_i32x8(a0), self.cvt_f32_i32x8(a1))
}
#[inline(always)]
fn splat_u32x16(self, val: u32) -> u32x16<Self> {
let half = self.splat_u32x8(val);
self.combine_u32x8(half, half)
}
#[inline(always)]
fn load_array_u32x16(self, val: [u32; 16usize]) -> u32x16<Self> {
u32x16 {
val: crate::support::Aligned512(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u32x16(self, val: &[u32; 16usize]) -> u32x16<Self> {
u32x16 {
val: crate::support::Aligned512(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_u32x16(self, a: u32x16<Self>) -> [u32; 16usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_u32x16(self, a: &u32x16<Self>) -> &[u32; 16usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_u32x16(self, a: &mut u32x16<Self>) -> &mut [u32; 16usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_u32x16(self, a: u32x16<Self>, dest: &mut [u32; 16usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_u32x16(self, a: u8x64<Self>) -> u32x16<Self> {
unsafe {
u32x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u32x16(self, a: u32x16<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u32x16<const SHIFT: usize>(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let mut dest = [Default::default(); 16usize];
dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_u32x16<const SHIFT: usize>(
self,
a: u32x16<Self>,
b: u32x16<Self>,
) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(
self.slide_within_blocks_u32x8::<SHIFT>(a0, b0),
self.slide_within_blocks_u32x8::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.add_u32x8(a0, b0), self.add_u32x8(a1, b1))
}
#[inline(always)]
fn sub_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.sub_u32x8(a0, b0), self.sub_u32x8(a1, b1))
}
#[inline(always)]
fn mul_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.mul_u32x8(a0, b0), self.mul_u32x8(a1, b1))
}
#[inline(always)]
fn and_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.and_u32x8(a0, b0), self.and_u32x8(a1, b1))
}
#[inline(always)]
fn or_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.or_u32x8(a0, b0), self.or_u32x8(a1, b1))
}
#[inline(always)]
fn xor_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.xor_u32x8(a0, b0), self.xor_u32x8(a1, b1))
}
#[inline(always)]
fn not_u32x16(self, a: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
self.combine_u32x8(self.not_u32x8(a0), self.not_u32x8(a1))
}
#[inline(always)]
fn shl_u32x16(self, a: u32x16<Self>, shift: u32) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
self.combine_u32x8(self.shl_u32x8(a0, shift), self.shl_u32x8(a1, shift))
}
#[inline(always)]
fn shlv_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.shlv_u32x8(a0, b0), self.shlv_u32x8(a1, b1))
}
#[inline(always)]
fn shr_u32x16(self, a: u32x16<Self>, shift: u32) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
self.combine_u32x8(self.shr_u32x8(a0, shift), self.shr_u32x8(a1, shift))
}
#[inline(always)]
fn shrv_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.shrv_u32x8(a0, b0), self.shrv_u32x8(a1, b1))
}
#[inline(always)]
fn simd_eq_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_mask32x8(self.simd_eq_u32x8(a0, b0), self.simd_eq_u32x8(a1, b1))
}
#[inline(always)]
fn simd_lt_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_mask32x8(self.simd_lt_u32x8(a0, b0), self.simd_lt_u32x8(a1, b1))
}
#[inline(always)]
fn simd_le_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_mask32x8(self.simd_le_u32x8(a0, b0), self.simd_le_u32x8(a1, b1))
}
#[inline(always)]
fn simd_ge_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_mask32x8(self.simd_ge_u32x8(a0, b0), self.simd_ge_u32x8(a1, b1))
}
#[inline(always)]
fn simd_gt_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_mask32x8(self.simd_gt_u32x8(a0, b0), self.simd_gt_u32x8(a1, b1))
}
#[inline(always)]
fn zip_low_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, _) = self.split_u32x16(a);
let (b0, _) = self.split_u32x16(b);
self.combine_u32x8(self.zip_low_u32x8(a0, b0), self.zip_high_u32x8(a0, b0))
}
#[inline(always)]
fn zip_high_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (_, a1) = self.split_u32x16(a);
let (_, b1) = self.split_u32x16(b);
self.combine_u32x8(self.zip_low_u32x8(a1, b1), self.zip_high_u32x8(a1, b1))
}
#[inline(always)]
fn unzip_low_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.unzip_low_u32x8(a0, a1), self.unzip_low_u32x8(b0, b1))
}
#[inline(always)]
fn unzip_high_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.unzip_high_u32x8(a0, a1), self.unzip_high_u32x8(b0, b1))
}
#[inline(always)]
fn select_u32x16(self, a: mask32x16<Self>, b: u32x16<Self>, c: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_u32x16(b);
let (c0, c1) = self.split_u32x16(c);
self.combine_u32x8(self.select_u32x8(a0, b0, c0), self.select_u32x8(a1, b1, c1))
}
#[inline(always)]
fn min_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.min_u32x8(a0, b0), self.min_u32x8(a1, b1))
}
#[inline(always)]
fn max_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.max_u32x8(a0, b0), self.max_u32x8(a1, b1))
}
#[inline(always)]
fn split_u32x16(self, a: u32x16<Self>) -> (u32x8<Self>, u32x8<Self>) {
let mut b0 = [0; 8usize];
let mut b1 = [0; 8usize];
b0.copy_from_slice(&a.val.0[0..8usize]);
b1.copy_from_slice(&a.val.0[8usize..16usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn load_interleaved_128_u32x16(self, src: &[u32; 16usize]) -> u32x16<Self> {
[
src[0usize],
src[4usize],
src[8usize],
src[12usize],
src[1usize],
src[5usize],
src[9usize],
src[13usize],
src[2usize],
src[6usize],
src[10usize],
src[14usize],
src[3usize],
src[7usize],
src[11usize],
src[15usize],
]
.simd_into(self)
}
#[inline(always)]
fn store_interleaved_128_u32x16(self, a: u32x16<Self>, dest: &mut [u32; 16usize]) -> () {
*dest = [
a[0usize], a[4usize], a[8usize], a[12usize], a[1usize], a[5usize], a[9usize],
a[13usize], a[2usize], a[6usize], a[10usize], a[14usize], a[3usize], a[7usize],
a[11usize], a[15usize],
];
}
#[inline(always)]
fn reinterpret_u8_u32x16(self, a: u32x16<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u32x16(a);
self.combine_u8x32(self.reinterpret_u8_u32x8(a0), self.reinterpret_u8_u32x8(a1))
}
#[inline(always)]
fn cvt_f32_u32x16(self, a: u32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
self.combine_f32x8(self.cvt_f32_u32x8(a0), self.cvt_f32_u32x8(a1))
}
#[inline(always)]
fn splat_mask32x16(self, val: i32) -> mask32x16<Self> {
let half = self.splat_mask32x8(val);
self.combine_mask32x8(half, half)
}
#[inline(always)]
fn load_array_mask32x16(self, val: [i32; 16usize]) -> mask32x16<Self> {
mask32x16 {
val: crate::support::Aligned512(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask32x16(self, val: &[i32; 16usize]) -> mask32x16<Self> {
mask32x16 {
val: crate::support::Aligned512(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_mask32x16(self, a: mask32x16<Self>) -> [i32; 16usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_mask32x16(self, a: &mask32x16<Self>) -> &[i32; 16usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_mask32x16(self, a: &mut mask32x16<Self>) -> &mut [i32; 16usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_mask32x16(self, a: mask32x16<Self>, dest: &mut [i32; 16usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_mask32x16(self, a: u8x64<Self>) -> mask32x16<Self> {
unsafe {
mask32x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask32x16(self, a: mask32x16<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask32x16<const SHIFT: usize>(
self,
a: mask32x16<Self>,
b: mask32x16<Self>,
) -> mask32x16<Self> {
let mut dest = [Default::default(); 16usize];
dest[..16usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[16usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_mask32x16<const SHIFT: usize>(
self,
a: mask32x16<Self>,
b: mask32x16<Self>,
) -> mask32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_mask32x16(b);
self.combine_mask32x8(
self.slide_within_blocks_mask32x8::<SHIFT>(a0, b0),
self.slide_within_blocks_mask32x8::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn and_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_mask32x16(b);
self.combine_mask32x8(self.and_mask32x8(a0, b0), self.and_mask32x8(a1, b1))
}
#[inline(always)]
fn or_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_mask32x16(b);
self.combine_mask32x8(self.or_mask32x8(a0, b0), self.or_mask32x8(a1, b1))
}
#[inline(always)]
fn xor_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_mask32x16(b);
self.combine_mask32x8(self.xor_mask32x8(a0, b0), self.xor_mask32x8(a1, b1))
}
#[inline(always)]
fn not_mask32x16(self, a: mask32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
self.combine_mask32x8(self.not_mask32x8(a0), self.not_mask32x8(a1))
}
#[inline(always)]
fn select_mask32x16(
self,
a: mask32x16<Self>,
b: mask32x16<Self>,
c: mask32x16<Self>,
) -> mask32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_mask32x16(b);
let (c0, c1) = self.split_mask32x16(c);
self.combine_mask32x8(
self.select_mask32x8(a0, b0, c0),
self.select_mask32x8(a1, b1, c1),
)
}
#[inline(always)]
fn simd_eq_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_mask32x16(b);
self.combine_mask32x8(self.simd_eq_mask32x8(a0, b0), self.simd_eq_mask32x8(a1, b1))
}
#[inline(always)]
fn any_true_mask32x16(self, a: mask32x16<Self>) -> bool {
let (a0, a1) = self.split_mask32x16(a);
self.any_true_mask32x8(a0) || self.any_true_mask32x8(a1)
}
#[inline(always)]
fn all_true_mask32x16(self, a: mask32x16<Self>) -> bool {
let (a0, a1) = self.split_mask32x16(a);
self.all_true_mask32x8(a0) && self.all_true_mask32x8(a1)
}
#[inline(always)]
fn any_false_mask32x16(self, a: mask32x16<Self>) -> bool {
let (a0, a1) = self.split_mask32x16(a);
self.any_false_mask32x8(a0) || self.any_false_mask32x8(a1)
}
#[inline(always)]
fn all_false_mask32x16(self, a: mask32x16<Self>) -> bool {
let (a0, a1) = self.split_mask32x16(a);
self.all_false_mask32x8(a0) && self.all_false_mask32x8(a1)
}
#[inline(always)]
fn split_mask32x16(self, a: mask32x16<Self>) -> (mask32x8<Self>, mask32x8<Self>) {
let mut b0 = [0; 8usize];
let mut b1 = [0; 8usize];
b0.copy_from_slice(&a.val.0[0..8usize]);
b1.copy_from_slice(&a.val.0[8usize..16usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn splat_f64x8(self, val: f64) -> f64x8<Self> {
let half = self.splat_f64x4(val);
self.combine_f64x4(half, half)
}
#[inline(always)]
fn load_array_f64x8(self, val: [f64; 8usize]) -> f64x8<Self> {
f64x8 {
val: crate::support::Aligned512(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_f64x8(self, val: &[f64; 8usize]) -> f64x8<Self> {
f64x8 {
val: crate::support::Aligned512(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_f64x8(self, a: f64x8<Self>) -> [f64; 8usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_f64x8(self, a: &f64x8<Self>) -> &[f64; 8usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_f64x8(self, a: &mut f64x8<Self>) -> &mut [f64; 8usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_f64x8(self, a: f64x8<Self>, dest: &mut [f64; 8usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_f64x8(self, a: u8x64<Self>) -> f64x8<Self> {
unsafe {
f64x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_f64x8(self, a: f64x8<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_f64x8<const SHIFT: usize>(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let mut dest = [Default::default(); 8usize];
dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_f64x8<const SHIFT: usize>(
self,
a: f64x8<Self>,
b: f64x8<Self>,
) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(
self.slide_within_blocks_f64x4::<SHIFT>(a0, b0),
self.slide_within_blocks_f64x4::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn abs_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f64x4(self.abs_f64x4(a0), self.abs_f64x4(a1))
}
#[inline(always)]
fn neg_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f64x4(self.neg_f64x4(a0), self.neg_f64x4(a1))
}
#[inline(always)]
fn sqrt_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f64x4(self.sqrt_f64x4(a0), self.sqrt_f64x4(a1))
}
#[inline(always)]
fn add_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.add_f64x4(a0, b0), self.add_f64x4(a1, b1))
}
#[inline(always)]
fn sub_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.sub_f64x4(a0, b0), self.sub_f64x4(a1, b1))
}
#[inline(always)]
fn mul_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.mul_f64x4(a0, b0), self.mul_f64x4(a1, b1))
}
#[inline(always)]
fn div_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.div_f64x4(a0, b0), self.div_f64x4(a1, b1))
}
#[inline(always)]
fn copysign_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.copysign_f64x4(a0, b0), self.copysign_f64x4(a1, b1))
}
#[inline(always)]
fn simd_eq_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_mask64x4(self.simd_eq_f64x4(a0, b0), self.simd_eq_f64x4(a1, b1))
}
#[inline(always)]
fn simd_lt_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_mask64x4(self.simd_lt_f64x4(a0, b0), self.simd_lt_f64x4(a1, b1))
}
#[inline(always)]
fn simd_le_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_mask64x4(self.simd_le_f64x4(a0, b0), self.simd_le_f64x4(a1, b1))
}
#[inline(always)]
fn simd_ge_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_mask64x4(self.simd_ge_f64x4(a0, b0), self.simd_ge_f64x4(a1, b1))
}
#[inline(always)]
fn simd_gt_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_mask64x4(self.simd_gt_f64x4(a0, b0), self.simd_gt_f64x4(a1, b1))
}
#[inline(always)]
fn zip_low_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, _) = self.split_f64x8(a);
let (b0, _) = self.split_f64x8(b);
self.combine_f64x4(self.zip_low_f64x4(a0, b0), self.zip_high_f64x4(a0, b0))
}
#[inline(always)]
fn zip_high_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (_, a1) = self.split_f64x8(a);
let (_, b1) = self.split_f64x8(b);
self.combine_f64x4(self.zip_low_f64x4(a1, b1), self.zip_high_f64x4(a1, b1))
}
#[inline(always)]
fn unzip_low_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.unzip_low_f64x4(a0, a1), self.unzip_low_f64x4(b0, b1))
}
#[inline(always)]
fn unzip_high_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.unzip_high_f64x4(a0, a1), self.unzip_high_f64x4(b0, b1))
}
#[inline(always)]
fn max_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.max_f64x4(a0, b0), self.max_f64x4(a1, b1))
}
#[inline(always)]
fn min_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.min_f64x4(a0, b0), self.min_f64x4(a1, b1))
}
#[inline(always)]
fn max_precise_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(
self.max_precise_f64x4(a0, b0),
self.max_precise_f64x4(a1, b1),
)
}
#[inline(always)]
fn min_precise_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(
self.min_precise_f64x4(a0, b0),
self.min_precise_f64x4(a1, b1),
)
}
#[inline(always)]
fn mul_add_f64x8(self, a: f64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
let (c0, c1) = self.split_f64x8(c);
self.combine_f64x4(
self.mul_add_f64x4(a0, b0, c0),
self.mul_add_f64x4(a1, b1, c1),
)
}
#[inline(always)]
fn mul_sub_f64x8(self, a: f64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
let (c0, c1) = self.split_f64x8(c);
self.combine_f64x4(
self.mul_sub_f64x4(a0, b0, c0),
self.mul_sub_f64x4(a1, b1, c1),
)
}
#[inline(always)]
fn floor_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f64x4(self.floor_f64x4(a0), self.floor_f64x4(a1))
}
#[inline(always)]
fn ceil_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f64x4(self.ceil_f64x4(a0), self.ceil_f64x4(a1))
}
#[inline(always)]
fn round_ties_even_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f64x4(
self.round_ties_even_f64x4(a0),
self.round_ties_even_f64x4(a1),
)
}
#[inline(always)]
fn fract_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f64x4(self.fract_f64x4(a0), self.fract_f64x4(a1))
}
#[inline(always)]
fn trunc_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f64x4(self.trunc_f64x4(a0), self.trunc_f64x4(a1))
}
#[inline(always)]
fn select_f64x8(self, a: mask64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
let (b0, b1) = self.split_f64x8(b);
let (c0, c1) = self.split_f64x8(c);
self.combine_f64x4(self.select_f64x4(a0, b0, c0), self.select_f64x4(a1, b1, c1))
}
#[inline(always)]
fn split_f64x8(self, a: f64x8<Self>) -> (f64x4<Self>, f64x4<Self>) {
let mut b0 = [0.0; 4usize];
let mut b1 = [0.0; 4usize];
b0.copy_from_slice(&a.val.0[0..4usize]);
b1.copy_from_slice(&a.val.0[4usize..8usize]);
(b0.simd_into(self), b1.simd_into(self))
}
#[inline(always)]
fn reinterpret_f32_f64x8(self, a: f64x8<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f32x8(
self.reinterpret_f32_f64x4(a0),
self.reinterpret_f32_f64x4(a1),
)
}
#[inline(always)]
fn splat_mask64x8(self, val: i64) -> mask64x8<Self> {
let half = self.splat_mask64x4(val);
self.combine_mask64x4(half, half)
}
#[inline(always)]
fn load_array_mask64x8(self, val: [i64; 8usize]) -> mask64x8<Self> {
mask64x8 {
val: crate::support::Aligned512(val),
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask64x8(self, val: &[i64; 8usize]) -> mask64x8<Self> {
mask64x8 {
val: crate::support::Aligned512(*val),
simd: self,
}
}
#[inline(always)]
fn as_array_mask64x8(self, a: mask64x8<Self>) -> [i64; 8usize] {
a.val.0
}
#[inline(always)]
fn as_array_ref_mask64x8(self, a: &mask64x8<Self>) -> &[i64; 8usize] {
&a.val.0
}
#[inline(always)]
fn as_array_mut_mask64x8(self, a: &mut mask64x8<Self>) -> &mut [i64; 8usize] {
&mut a.val.0
}
#[inline(always)]
fn store_array_mask64x8(self, a: mask64x8<Self>, dest: &mut [i64; 8usize]) -> () {
*dest = a.val.0;
}
#[inline(always)]
fn cvt_from_bytes_mask64x8(self, a: u8x64<Self>) -> mask64x8<Self> {
unsafe {
mask64x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask64x8(self, a: mask64x8<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask64x8<const SHIFT: usize>(
self,
a: mask64x8<Self>,
b: mask64x8<Self>,
) -> mask64x8<Self> {
let mut dest = [Default::default(); 8usize];
dest[..8usize - SHIFT].copy_from_slice(&a.val.0[SHIFT..]);
dest[8usize - SHIFT..].copy_from_slice(&b.val.0[..SHIFT]);
dest.simd_into(self)
}
#[inline(always)]
fn slide_within_blocks_mask64x8<const SHIFT: usize>(
self,
a: mask64x8<Self>,
b: mask64x8<Self>,
) -> mask64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
let (b0, b1) = self.split_mask64x8(b);
self.combine_mask64x4(
self.slide_within_blocks_mask64x4::<SHIFT>(a0, b0),
self.slide_within_blocks_mask64x4::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn and_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
let (b0, b1) = self.split_mask64x8(b);
self.combine_mask64x4(self.and_mask64x4(a0, b0), self.and_mask64x4(a1, b1))
}
#[inline(always)]
fn or_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
let (b0, b1) = self.split_mask64x8(b);
self.combine_mask64x4(self.or_mask64x4(a0, b0), self.or_mask64x4(a1, b1))
}
#[inline(always)]
fn xor_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
let (b0, b1) = self.split_mask64x8(b);
self.combine_mask64x4(self.xor_mask64x4(a0, b0), self.xor_mask64x4(a1, b1))
}
#[inline(always)]
fn not_mask64x8(self, a: mask64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
self.combine_mask64x4(self.not_mask64x4(a0), self.not_mask64x4(a1))
}
#[inline(always)]
fn select_mask64x8(
self,
a: mask64x8<Self>,
b: mask64x8<Self>,
c: mask64x8<Self>,
) -> mask64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
let (b0, b1) = self.split_mask64x8(b);
let (c0, c1) = self.split_mask64x8(c);
self.combine_mask64x4(
self.select_mask64x4(a0, b0, c0),
self.select_mask64x4(a1, b1, c1),
)
}
#[inline(always)]
fn simd_eq_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
let (b0, b1) = self.split_mask64x8(b);
self.combine_mask64x4(self.simd_eq_mask64x4(a0, b0), self.simd_eq_mask64x4(a1, b1))
}
#[inline(always)]
fn any_true_mask64x8(self, a: mask64x8<Self>) -> bool {
let (a0, a1) = self.split_mask64x8(a);
self.any_true_mask64x4(a0) || self.any_true_mask64x4(a1)
}
#[inline(always)]
fn all_true_mask64x8(self, a: mask64x8<Self>) -> bool {
let (a0, a1) = self.split_mask64x8(a);
self.all_true_mask64x4(a0) && self.all_true_mask64x4(a1)
}
#[inline(always)]
fn any_false_mask64x8(self, a: mask64x8<Self>) -> bool {
let (a0, a1) = self.split_mask64x8(a);
self.any_false_mask64x4(a0) || self.any_false_mask64x4(a1)
}
#[inline(always)]
fn all_false_mask64x8(self, a: mask64x8<Self>) -> bool {
let (a0, a1) = self.split_mask64x8(a);
self.all_false_mask64x4(a0) && self.all_false_mask64x4(a1)
}
#[inline(always)]
fn split_mask64x8(self, a: mask64x8<Self>) -> (mask64x4<Self>, mask64x4<Self>) {
let mut b0 = [0; 4usize];
let mut b1 = [0; 4usize];
b0.copy_from_slice(&a.val.0[0..4usize]);
b1.copy_from_slice(&a.val.0[4usize..8usize]);
(b0.simd_into(self), b1.simd_into(self))
}
}