use crate::{Level, arch_types::ArchTypes, prelude::*, seal::Seal};
use crate::{
f32x4, f32x8, f32x16, f64x2, f64x4, f64x8, i8x16, i8x32, i8x64, i16x8, i16x16, i16x32, i32x4,
i32x8, i32x16, mask8x16, mask8x32, mask8x64, mask16x8, mask16x16, mask16x32, mask32x4,
mask32x8, mask32x16, mask64x2, mask64x4, mask64x8, u8x16, u8x32, u8x64, u16x8, u16x16, u16x32,
u32x4, u32x8, u32x16,
};
use core::arch::aarch64::*;
#[doc = "The SIMD token for the \"neon\" level."]
#[derive(Clone, Copy, Debug)]
pub struct Neon {
pub neon: crate::core_arch::aarch64::Neon,
}
impl Neon {
#[inline]
pub const unsafe fn new_unchecked() -> Self {
Neon {
neon: unsafe { crate::core_arch::aarch64::Neon::new_unchecked() },
}
}
}
impl Seal for Neon {}
impl ArchTypes for Neon {
type f32x4 = crate::support::Aligned128<float32x4_t>;
type i8x16 = crate::support::Aligned128<int8x16_t>;
type u8x16 = crate::support::Aligned128<uint8x16_t>;
type mask8x16 = crate::support::Aligned128<int8x16_t>;
type i16x8 = crate::support::Aligned128<int16x8_t>;
type u16x8 = crate::support::Aligned128<uint16x8_t>;
type mask16x8 = crate::support::Aligned128<int16x8_t>;
type i32x4 = crate::support::Aligned128<int32x4_t>;
type u32x4 = crate::support::Aligned128<uint32x4_t>;
type mask32x4 = crate::support::Aligned128<int32x4_t>;
type f64x2 = crate::support::Aligned128<float64x2_t>;
type mask64x2 = crate::support::Aligned128<int64x2_t>;
type f32x8 = crate::support::Aligned256<float32x4x2_t>;
type i8x32 = crate::support::Aligned256<int8x16x2_t>;
type u8x32 = crate::support::Aligned256<uint8x16x2_t>;
type mask8x32 = crate::support::Aligned256<int8x16x2_t>;
type i16x16 = crate::support::Aligned256<int16x8x2_t>;
type u16x16 = crate::support::Aligned256<uint16x8x2_t>;
type mask16x16 = crate::support::Aligned256<int16x8x2_t>;
type i32x8 = crate::support::Aligned256<int32x4x2_t>;
type u32x8 = crate::support::Aligned256<uint32x4x2_t>;
type mask32x8 = crate::support::Aligned256<int32x4x2_t>;
type f64x4 = crate::support::Aligned256<float64x2x2_t>;
type mask64x4 = crate::support::Aligned256<int64x2x2_t>;
type f32x16 = crate::support::Aligned512<float32x4x4_t>;
type i8x64 = crate::support::Aligned512<int8x16x4_t>;
type u8x64 = crate::support::Aligned512<uint8x16x4_t>;
type mask8x64 = crate::support::Aligned512<int8x16x4_t>;
type i16x32 = crate::support::Aligned512<int16x8x4_t>;
type u16x32 = crate::support::Aligned512<uint16x8x4_t>;
type mask16x32 = crate::support::Aligned512<int16x8x4_t>;
type i32x16 = crate::support::Aligned512<int32x4x4_t>;
type u32x16 = crate::support::Aligned512<uint32x4x4_t>;
type mask32x16 = crate::support::Aligned512<int32x4x4_t>;
type f64x8 = crate::support::Aligned512<float64x2x4_t>;
type mask64x8 = crate::support::Aligned512<int64x2x4_t>;
}
impl Simd for Neon {
type f32s = f32x4<Self>;
type f64s = f64x2<Self>;
type u8s = u8x16<Self>;
type i8s = i8x16<Self>;
type u16s = u16x8<Self>;
type i16s = i16x8<Self>;
type u32s = u32x4<Self>;
type i32s = i32x4<Self>;
type mask8s = mask8x16<Self>;
type mask16s = mask16x8<Self>;
type mask32s = mask32x4<Self>;
type mask64s = mask64x2<Self>;
#[inline(always)]
fn level(self) -> Level {
Level::Neon(self)
}
#[inline]
fn vectorize<F: FnOnce() -> R, R>(self, f: F) -> R {
#[target_feature(enable = "neon")]
unsafe fn vectorize_neon<F: FnOnce() -> R, R>(f: F) -> R {
f()
}
unsafe { vectorize_neon(f) }
}
#[inline(always)]
fn splat_f32x4(self, val: f32) -> f32x4<Self> {
unsafe { vdupq_n_f32(val).simd_into(self) }
}
#[inline(always)]
fn load_array_f32x4(self, val: [f32; 4usize]) -> f32x4<Self> {
f32x4 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_f32x4(self, val: &[f32; 4usize]) -> f32x4<Self> {
f32x4 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_f32x4(self, a: f32x4<Self>) -> [f32; 4usize] {
unsafe { core::mem::transmute::<float32x4_t, [f32; 4usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_f32x4(self, a: &f32x4<Self>) -> &[f32; 4usize] {
unsafe { core::mem::transmute::<&float32x4_t, &[f32; 4usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_f32x4(self, a: &mut f32x4<Self>) -> &mut [f32; 4usize] {
unsafe { core::mem::transmute::<&mut float32x4_t, &mut [f32; 4usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_f32x4(self, a: f32x4<Self>, dest: &mut [f32; 4usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const f32,
dest.as_mut_ptr(),
4usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_f32x4(self, a: u8x16<Self>) -> f32x4<Self> {
unsafe {
f32x4 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_f32x4(self, a: f32x4<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_f32x4<const SHIFT: usize>(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
if SHIFT >= 4usize {
return b;
}
let result = unsafe {
dyn_vext_128(
self.cvt_to_bytes_f32x4(a).val.0,
self.cvt_to_bytes_f32x4(b).val.0,
SHIFT * 4usize,
)
};
self.cvt_from_bytes_f32x4(u8x16 {
val: crate::support::Aligned128(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_f32x4<const SHIFT: usize>(
self,
a: f32x4<Self>,
b: f32x4<Self>,
) -> f32x4<Self> {
self.slide_f32x4::<SHIFT>(a, b)
}
#[inline(always)]
fn abs_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
unsafe { vabsq_f32(a.into()).simd_into(self) }
}
#[inline(always)]
fn neg_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
unsafe { vnegq_f32(a.into()).simd_into(self) }
}
#[inline(always)]
fn sqrt_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
unsafe { vsqrtq_f32(a.into()).simd_into(self) }
}
#[inline(always)]
fn add_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
unsafe { vaddq_f32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn sub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
unsafe { vsubq_f32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn mul_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
unsafe { vmulq_f32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn div_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
unsafe { vdivq_f32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn copysign_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
unsafe {
let sign_mask = vdupq_n_u32(1 << 31);
vbslq_f32(sign_mask, b.into(), a.into()).simd_into(self)
}
}
#[inline(always)]
fn simd_eq_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
unsafe { vreinterpretq_s32_u32(vceqq_f32(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_lt_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
unsafe { vreinterpretq_s32_u32(vcltq_f32(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_le_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
unsafe { vreinterpretq_s32_u32(vcleq_f32(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_ge_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
unsafe { vreinterpretq_s32_u32(vcgeq_f32(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_gt_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> mask32x4<Self> {
unsafe { vreinterpretq_s32_u32(vcgtq_f32(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn zip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
let x = a.into();
let y = b.into();
unsafe { vzip1q_f32(x, y).simd_into(self) }
}
#[inline(always)]
fn zip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
let x = a.into();
let y = b.into();
unsafe { vzip2q_f32(x, y).simd_into(self) }
}
#[inline(always)]
fn unzip_low_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
let x = a.into();
let y = b.into();
unsafe { vuzp1q_f32(x, y).simd_into(self) }
}
#[inline(always)]
fn unzip_high_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
let x = a.into();
let y = b.into();
unsafe { vuzp2q_f32(x, y).simd_into(self) }
}
#[inline(always)]
fn max_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
unsafe { vmaxq_f32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn min_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
unsafe { vminq_f32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn max_precise_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
unsafe { vmaxnmq_f32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn min_precise_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x4<Self> {
unsafe { vminnmq_f32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn mul_add_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
unsafe { vfmaq_f32(c.into(), b.into(), a.into()).simd_into(self) }
}
#[inline(always)]
fn mul_sub_f32x4(self, a: f32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
unsafe { vnegq_f32(vfmsq_f32(c.into(), b.into(), a.into())).simd_into(self) }
}
#[inline(always)]
fn floor_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
unsafe { vrndmq_f32(a.into()).simd_into(self) }
}
#[inline(always)]
fn ceil_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
unsafe { vrndpq_f32(a.into()).simd_into(self) }
}
#[inline(always)]
fn round_ties_even_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
unsafe { vrndnq_f32(a.into()).simd_into(self) }
}
#[inline(always)]
fn fract_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
unsafe {
let c1 = vcvtq_s32_f32(a.into());
let c2 = vcvtq_f32_s32(c1);
vsubq_f32(a.into(), c2).simd_into(self)
}
}
#[inline(always)]
fn trunc_f32x4(self, a: f32x4<Self>) -> f32x4<Self> {
unsafe { vrndq_f32(a.into()).simd_into(self) }
}
#[inline(always)]
fn select_f32x4(self, a: mask32x4<Self>, b: f32x4<Self>, c: f32x4<Self>) -> f32x4<Self> {
unsafe { vbslq_f32(vreinterpretq_u32_s32(a.into()), b.into(), c.into()).simd_into(self) }
}
#[inline(always)]
fn combine_f32x4(self, a: f32x4<Self>, b: f32x4<Self>) -> f32x8<Self> {
f32x8 {
val: crate::support::Aligned256(float32x4x2_t(a.val.0, b.val.0)),
simd: self,
}
}
#[inline(always)]
fn reinterpret_f64_f32x4(self, a: f32x4<Self>) -> f64x2<Self> {
unsafe { vreinterpretq_f64_f32(a.into()).simd_into(self) }
}
#[inline(always)]
fn reinterpret_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
unsafe { vreinterpretq_s32_f32(a.into()).simd_into(self) }
}
#[inline(always)]
fn reinterpret_u8_f32x4(self, a: f32x4<Self>) -> u8x16<Self> {
unsafe { vreinterpretq_u8_f32(a.into()).simd_into(self) }
}
#[inline(always)]
fn reinterpret_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
unsafe { vreinterpretq_u32_f32(a.into()).simd_into(self) }
}
#[inline(always)]
fn cvt_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
unsafe { vcvtq_u32_f32(a.into()).simd_into(self) }
}
#[inline(always)]
fn cvt_u32_precise_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
self.cvt_u32_f32x4(a)
}
#[inline(always)]
fn cvt_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
unsafe { vcvtq_s32_f32(a.into()).simd_into(self) }
}
#[inline(always)]
fn cvt_i32_precise_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
self.cvt_i32_f32x4(a)
}
#[inline(always)]
fn splat_i8x16(self, val: i8) -> i8x16<Self> {
unsafe { vdupq_n_s8(val).simd_into(self) }
}
#[inline(always)]
fn load_array_i8x16(self, val: [i8; 16usize]) -> i8x16<Self> {
i8x16 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i8x16(self, val: &[i8; 16usize]) -> i8x16<Self> {
i8x16 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_i8x16(self, a: i8x16<Self>) -> [i8; 16usize] {
unsafe { core::mem::transmute::<int8x16_t, [i8; 16usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_i8x16(self, a: &i8x16<Self>) -> &[i8; 16usize] {
unsafe { core::mem::transmute::<&int8x16_t, &[i8; 16usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_i8x16(self, a: &mut i8x16<Self>) -> &mut [i8; 16usize] {
unsafe { core::mem::transmute::<&mut int8x16_t, &mut [i8; 16usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_i8x16(self, a: i8x16<Self>, dest: &mut [i8; 16usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i8,
dest.as_mut_ptr(),
16usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_i8x16(self, a: u8x16<Self>) -> i8x16<Self> {
unsafe {
i8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i8x16(self, a: i8x16<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i8x16<const SHIFT: usize>(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
if SHIFT >= 16usize {
return b;
}
let result = unsafe {
dyn_vext_128(
self.cvt_to_bytes_i8x16(a).val.0,
self.cvt_to_bytes_i8x16(b).val.0,
SHIFT,
)
};
self.cvt_from_bytes_i8x16(u8x16 {
val: crate::support::Aligned128(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_i8x16<const SHIFT: usize>(
self,
a: i8x16<Self>,
b: i8x16<Self>,
) -> i8x16<Self> {
self.slide_i8x16::<SHIFT>(a, b)
}
#[inline(always)]
fn add_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
unsafe { vaddq_s8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn sub_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
unsafe { vsubq_s8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn mul_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
unsafe { vmulq_s8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn and_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
unsafe { vandq_s8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn or_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
unsafe { vorrq_s8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn xor_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
unsafe { veorq_s8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn not_i8x16(self, a: i8x16<Self>) -> i8x16<Self> {
unsafe { vmvnq_s8(a.into()).simd_into(self) }
}
#[inline(always)]
fn shl_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
unsafe { vshlq_s8(a.into(), vdupq_n_s8(shift as i8)).simd_into(self) }
}
#[inline(always)]
fn shlv_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
unsafe { vshlq_s8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn shr_i8x16(self, a: i8x16<Self>, shift: u32) -> i8x16<Self> {
unsafe { vshlq_s8(a.into(), vdupq_n_s8(-(shift as i8))).simd_into(self) }
}
#[inline(always)]
fn shrv_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
unsafe { vshlq_s8(a.into(), vnegq_s8(b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_eq_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
unsafe { vreinterpretq_s8_u8(vceqq_s8(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_lt_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
unsafe { vreinterpretq_s8_u8(vcltq_s8(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_le_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
unsafe { vreinterpretq_s8_u8(vcleq_s8(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_ge_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
unsafe { vreinterpretq_s8_u8(vcgeq_s8(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_gt_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> mask8x16<Self> {
unsafe { vreinterpretq_s8_u8(vcgtq_s8(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn zip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
let x = a.into();
let y = b.into();
unsafe { vzip1q_s8(x, y).simd_into(self) }
}
#[inline(always)]
fn zip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
let x = a.into();
let y = b.into();
unsafe { vzip2q_s8(x, y).simd_into(self) }
}
#[inline(always)]
fn unzip_low_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
let x = a.into();
let y = b.into();
unsafe { vuzp1q_s8(x, y).simd_into(self) }
}
#[inline(always)]
fn unzip_high_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
let x = a.into();
let y = b.into();
unsafe { vuzp2q_s8(x, y).simd_into(self) }
}
#[inline(always)]
fn select_i8x16(self, a: mask8x16<Self>, b: i8x16<Self>, c: i8x16<Self>) -> i8x16<Self> {
unsafe { vbslq_s8(vreinterpretq_u8_s8(a.into()), b.into(), c.into()).simd_into(self) }
}
#[inline(always)]
fn min_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
unsafe { vminq_s8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn max_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x16<Self> {
unsafe { vmaxq_s8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn combine_i8x16(self, a: i8x16<Self>, b: i8x16<Self>) -> i8x32<Self> {
i8x32 {
val: crate::support::Aligned256(int8x16x2_t(a.val.0, b.val.0)),
simd: self,
}
}
#[inline(always)]
fn neg_i8x16(self, a: i8x16<Self>) -> i8x16<Self> {
unsafe { vnegq_s8(a.into()).simd_into(self) }
}
#[inline(always)]
fn reinterpret_u8_i8x16(self, a: i8x16<Self>) -> u8x16<Self> {
unsafe { vreinterpretq_u8_s8(a.into()).simd_into(self) }
}
#[inline(always)]
fn reinterpret_u32_i8x16(self, a: i8x16<Self>) -> u32x4<Self> {
unsafe { vreinterpretq_u32_s8(a.into()).simd_into(self) }
}
#[inline(always)]
fn splat_u8x16(self, val: u8) -> u8x16<Self> {
unsafe { vdupq_n_u8(val).simd_into(self) }
}
#[inline(always)]
fn load_array_u8x16(self, val: [u8; 16usize]) -> u8x16<Self> {
u8x16 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u8x16(self, val: &[u8; 16usize]) -> u8x16<Self> {
u8x16 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_u8x16(self, a: u8x16<Self>) -> [u8; 16usize] {
unsafe { core::mem::transmute::<uint8x16_t, [u8; 16usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_u8x16(self, a: &u8x16<Self>) -> &[u8; 16usize] {
unsafe { core::mem::transmute::<&uint8x16_t, &[u8; 16usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_u8x16(self, a: &mut u8x16<Self>) -> &mut [u8; 16usize] {
unsafe { core::mem::transmute::<&mut uint8x16_t, &mut [u8; 16usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_u8x16(self, a: u8x16<Self>, dest: &mut [u8; 16usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const u8,
dest.as_mut_ptr(),
16usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_u8x16(self, a: u8x16<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u8x16(self, a: u8x16<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u8x16<const SHIFT: usize>(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
if SHIFT >= 16usize {
return b;
}
let result = unsafe {
dyn_vext_128(
self.cvt_to_bytes_u8x16(a).val.0,
self.cvt_to_bytes_u8x16(b).val.0,
SHIFT,
)
};
self.cvt_from_bytes_u8x16(u8x16 {
val: crate::support::Aligned128(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_u8x16<const SHIFT: usize>(
self,
a: u8x16<Self>,
b: u8x16<Self>,
) -> u8x16<Self> {
self.slide_u8x16::<SHIFT>(a, b)
}
#[inline(always)]
fn add_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
unsafe { vaddq_u8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn sub_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
unsafe { vsubq_u8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn mul_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
unsafe { vmulq_u8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn and_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
unsafe { vandq_u8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn or_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
unsafe { vorrq_u8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn xor_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
unsafe { veorq_u8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn not_u8x16(self, a: u8x16<Self>) -> u8x16<Self> {
unsafe { vmvnq_u8(a.into()).simd_into(self) }
}
#[inline(always)]
fn shl_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
unsafe { vshlq_u8(a.into(), vdupq_n_s8(shift as i8)).simd_into(self) }
}
#[inline(always)]
fn shlv_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
unsafe { vshlq_u8(a.into(), vreinterpretq_s8_u8(b.into())).simd_into(self) }
}
#[inline(always)]
fn shr_u8x16(self, a: u8x16<Self>, shift: u32) -> u8x16<Self> {
unsafe { vshlq_u8(a.into(), vdupq_n_s8(-(shift as i8))).simd_into(self) }
}
#[inline(always)]
fn shrv_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
unsafe { vshlq_u8(a.into(), vnegq_s8(vreinterpretq_s8_u8(b.into()))).simd_into(self) }
}
#[inline(always)]
fn simd_eq_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
unsafe { vreinterpretq_s8_u8(vceqq_u8(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_lt_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
unsafe { vreinterpretq_s8_u8(vcltq_u8(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_le_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
unsafe { vreinterpretq_s8_u8(vcleq_u8(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_ge_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
unsafe { vreinterpretq_s8_u8(vcgeq_u8(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_gt_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> mask8x16<Self> {
unsafe { vreinterpretq_s8_u8(vcgtq_u8(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn zip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
let x = a.into();
let y = b.into();
unsafe { vzip1q_u8(x, y).simd_into(self) }
}
#[inline(always)]
fn zip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
let x = a.into();
let y = b.into();
unsafe { vzip2q_u8(x, y).simd_into(self) }
}
#[inline(always)]
fn unzip_low_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
let x = a.into();
let y = b.into();
unsafe { vuzp1q_u8(x, y).simd_into(self) }
}
#[inline(always)]
fn unzip_high_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
let x = a.into();
let y = b.into();
unsafe { vuzp2q_u8(x, y).simd_into(self) }
}
#[inline(always)]
fn select_u8x16(self, a: mask8x16<Self>, b: u8x16<Self>, c: u8x16<Self>) -> u8x16<Self> {
unsafe { vbslq_u8(vreinterpretq_u8_s8(a.into()), b.into(), c.into()).simd_into(self) }
}
#[inline(always)]
fn min_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
unsafe { vminq_u8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn max_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x16<Self> {
unsafe { vmaxq_u8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn combine_u8x16(self, a: u8x16<Self>, b: u8x16<Self>) -> u8x32<Self> {
u8x32 {
val: crate::support::Aligned256(uint8x16x2_t(a.val.0, b.val.0)),
simd: self,
}
}
#[inline(always)]
fn widen_u8x16(self, a: u8x16<Self>) -> u16x16<Self> {
unsafe {
let low = vmovl_u8(vget_low_u8(a.into()));
let high = vmovl_u8(vget_high_u8(a.into()));
uint16x8x2_t(low, high).simd_into(self)
}
}
#[inline(always)]
fn reinterpret_u32_u8x16(self, a: u8x16<Self>) -> u32x4<Self> {
unsafe { vreinterpretq_u32_u8(a.into()).simd_into(self) }
}
#[inline(always)]
fn splat_mask8x16(self, val: i8) -> mask8x16<Self> {
unsafe { vdupq_n_s8(val).simd_into(self) }
}
#[inline(always)]
fn load_array_mask8x16(self, val: [i8; 16usize]) -> mask8x16<Self> {
mask8x16 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask8x16(self, val: &[i8; 16usize]) -> mask8x16<Self> {
mask8x16 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_mask8x16(self, a: mask8x16<Self>) -> [i8; 16usize] {
unsafe { core::mem::transmute::<int8x16_t, [i8; 16usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_mask8x16(self, a: &mask8x16<Self>) -> &[i8; 16usize] {
unsafe { core::mem::transmute::<&int8x16_t, &[i8; 16usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_mask8x16(self, a: &mut mask8x16<Self>) -> &mut [i8; 16usize] {
unsafe { core::mem::transmute::<&mut int8x16_t, &mut [i8; 16usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_mask8x16(self, a: mask8x16<Self>, dest: &mut [i8; 16usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i8,
dest.as_mut_ptr(),
16usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_mask8x16(self, a: u8x16<Self>) -> mask8x16<Self> {
unsafe {
mask8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask8x16(self, a: mask8x16<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask8x16<const SHIFT: usize>(
self,
a: mask8x16<Self>,
b: mask8x16<Self>,
) -> mask8x16<Self> {
if SHIFT >= 16usize {
return b;
}
let result = unsafe {
dyn_vext_128(
self.cvt_to_bytes_mask8x16(a).val.0,
self.cvt_to_bytes_mask8x16(b).val.0,
SHIFT,
)
};
self.cvt_from_bytes_mask8x16(u8x16 {
val: crate::support::Aligned128(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_mask8x16<const SHIFT: usize>(
self,
a: mask8x16<Self>,
b: mask8x16<Self>,
) -> mask8x16<Self> {
self.slide_mask8x16::<SHIFT>(a, b)
}
#[inline(always)]
fn and_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
unsafe { vandq_s8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn or_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
unsafe { vorrq_s8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn xor_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
unsafe { veorq_s8(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn not_mask8x16(self, a: mask8x16<Self>) -> mask8x16<Self> {
unsafe { vmvnq_s8(a.into()).simd_into(self) }
}
#[inline(always)]
fn select_mask8x16(
self,
a: mask8x16<Self>,
b: mask8x16<Self>,
c: mask8x16<Self>,
) -> mask8x16<Self> {
unsafe { vbslq_s8(vreinterpretq_u8_s8(a.into()), b.into(), c.into()).simd_into(self) }
}
#[inline(always)]
fn simd_eq_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x16<Self> {
unsafe { vreinterpretq_s8_u8(vceqq_s8(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn any_true_mask8x16(self, a: mask8x16<Self>) -> bool {
unsafe { vmaxvq_u32(vreinterpretq_u32_s8(a.into())) != 0 }
}
#[inline(always)]
fn all_true_mask8x16(self, a: mask8x16<Self>) -> bool {
unsafe { vminvq_u32(vreinterpretq_u32_s8(a.into())) == 0xffffffff }
}
#[inline(always)]
fn any_false_mask8x16(self, a: mask8x16<Self>) -> bool {
unsafe { vminvq_u32(vreinterpretq_u32_s8(a.into())) != 0xffffffff }
}
#[inline(always)]
fn all_false_mask8x16(self, a: mask8x16<Self>) -> bool {
unsafe { vmaxvq_u32(vreinterpretq_u32_s8(a.into())) == 0 }
}
#[inline(always)]
fn combine_mask8x16(self, a: mask8x16<Self>, b: mask8x16<Self>) -> mask8x32<Self> {
mask8x32 {
val: crate::support::Aligned256(int8x16x2_t(a.val.0, b.val.0)),
simd: self,
}
}
#[inline(always)]
fn splat_i16x8(self, val: i16) -> i16x8<Self> {
unsafe { vdupq_n_s16(val).simd_into(self) }
}
#[inline(always)]
fn load_array_i16x8(self, val: [i16; 8usize]) -> i16x8<Self> {
i16x8 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i16x8(self, val: &[i16; 8usize]) -> i16x8<Self> {
i16x8 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_i16x8(self, a: i16x8<Self>) -> [i16; 8usize] {
unsafe { core::mem::transmute::<int16x8_t, [i16; 8usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_i16x8(self, a: &i16x8<Self>) -> &[i16; 8usize] {
unsafe { core::mem::transmute::<&int16x8_t, &[i16; 8usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_i16x8(self, a: &mut i16x8<Self>) -> &mut [i16; 8usize] {
unsafe { core::mem::transmute::<&mut int16x8_t, &mut [i16; 8usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_i16x8(self, a: i16x8<Self>, dest: &mut [i16; 8usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i16,
dest.as_mut_ptr(),
8usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_i16x8(self, a: u8x16<Self>) -> i16x8<Self> {
unsafe {
i16x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i16x8(self, a: i16x8<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i16x8<const SHIFT: usize>(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
if SHIFT >= 8usize {
return b;
}
let result = unsafe {
dyn_vext_128(
self.cvt_to_bytes_i16x8(a).val.0,
self.cvt_to_bytes_i16x8(b).val.0,
SHIFT * 2usize,
)
};
self.cvt_from_bytes_i16x8(u8x16 {
val: crate::support::Aligned128(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_i16x8<const SHIFT: usize>(
self,
a: i16x8<Self>,
b: i16x8<Self>,
) -> i16x8<Self> {
self.slide_i16x8::<SHIFT>(a, b)
}
#[inline(always)]
fn add_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
unsafe { vaddq_s16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn sub_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
unsafe { vsubq_s16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn mul_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
unsafe { vmulq_s16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn and_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
unsafe { vandq_s16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn or_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
unsafe { vorrq_s16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn xor_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
unsafe { veorq_s16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn not_i16x8(self, a: i16x8<Self>) -> i16x8<Self> {
unsafe { vmvnq_s16(a.into()).simd_into(self) }
}
#[inline(always)]
fn shl_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
unsafe { vshlq_s16(a.into(), vdupq_n_s16(shift as i16)).simd_into(self) }
}
#[inline(always)]
fn shlv_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
unsafe { vshlq_s16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn shr_i16x8(self, a: i16x8<Self>, shift: u32) -> i16x8<Self> {
unsafe { vshlq_s16(a.into(), vdupq_n_s16(-(shift as i16))).simd_into(self) }
}
#[inline(always)]
fn shrv_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
unsafe { vshlq_s16(a.into(), vnegq_s16(b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_eq_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
unsafe { vreinterpretq_s16_u16(vceqq_s16(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_lt_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
unsafe { vreinterpretq_s16_u16(vcltq_s16(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_le_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
unsafe { vreinterpretq_s16_u16(vcleq_s16(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_ge_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
unsafe { vreinterpretq_s16_u16(vcgeq_s16(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_gt_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> mask16x8<Self> {
unsafe { vreinterpretq_s16_u16(vcgtq_s16(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn zip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
let x = a.into();
let y = b.into();
unsafe { vzip1q_s16(x, y).simd_into(self) }
}
#[inline(always)]
fn zip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
let x = a.into();
let y = b.into();
unsafe { vzip2q_s16(x, y).simd_into(self) }
}
#[inline(always)]
fn unzip_low_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
let x = a.into();
let y = b.into();
unsafe { vuzp1q_s16(x, y).simd_into(self) }
}
#[inline(always)]
fn unzip_high_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
let x = a.into();
let y = b.into();
unsafe { vuzp2q_s16(x, y).simd_into(self) }
}
#[inline(always)]
fn select_i16x8(self, a: mask16x8<Self>, b: i16x8<Self>, c: i16x8<Self>) -> i16x8<Self> {
unsafe { vbslq_s16(vreinterpretq_u16_s16(a.into()), b.into(), c.into()).simd_into(self) }
}
#[inline(always)]
fn min_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
unsafe { vminq_s16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn max_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x8<Self> {
unsafe { vmaxq_s16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn combine_i16x8(self, a: i16x8<Self>, b: i16x8<Self>) -> i16x16<Self> {
i16x16 {
val: crate::support::Aligned256(int16x8x2_t(a.val.0, b.val.0)),
simd: self,
}
}
#[inline(always)]
fn neg_i16x8(self, a: i16x8<Self>) -> i16x8<Self> {
unsafe { vnegq_s16(a.into()).simd_into(self) }
}
#[inline(always)]
fn reinterpret_u8_i16x8(self, a: i16x8<Self>) -> u8x16<Self> {
unsafe { vreinterpretq_u8_s16(a.into()).simd_into(self) }
}
#[inline(always)]
fn reinterpret_u32_i16x8(self, a: i16x8<Self>) -> u32x4<Self> {
unsafe { vreinterpretq_u32_s16(a.into()).simd_into(self) }
}
#[inline(always)]
fn splat_u16x8(self, val: u16) -> u16x8<Self> {
unsafe { vdupq_n_u16(val).simd_into(self) }
}
#[inline(always)]
fn load_array_u16x8(self, val: [u16; 8usize]) -> u16x8<Self> {
u16x8 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u16x8(self, val: &[u16; 8usize]) -> u16x8<Self> {
u16x8 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_u16x8(self, a: u16x8<Self>) -> [u16; 8usize] {
unsafe { core::mem::transmute::<uint16x8_t, [u16; 8usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_u16x8(self, a: &u16x8<Self>) -> &[u16; 8usize] {
unsafe { core::mem::transmute::<&uint16x8_t, &[u16; 8usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_u16x8(self, a: &mut u16x8<Self>) -> &mut [u16; 8usize] {
unsafe { core::mem::transmute::<&mut uint16x8_t, &mut [u16; 8usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_u16x8(self, a: u16x8<Self>, dest: &mut [u16; 8usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const u16,
dest.as_mut_ptr(),
8usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_u16x8(self, a: u8x16<Self>) -> u16x8<Self> {
unsafe {
u16x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u16x8(self, a: u16x8<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u16x8<const SHIFT: usize>(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
if SHIFT >= 8usize {
return b;
}
let result = unsafe {
dyn_vext_128(
self.cvt_to_bytes_u16x8(a).val.0,
self.cvt_to_bytes_u16x8(b).val.0,
SHIFT * 2usize,
)
};
self.cvt_from_bytes_u16x8(u8x16 {
val: crate::support::Aligned128(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_u16x8<const SHIFT: usize>(
self,
a: u16x8<Self>,
b: u16x8<Self>,
) -> u16x8<Self> {
self.slide_u16x8::<SHIFT>(a, b)
}
#[inline(always)]
fn add_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
unsafe { vaddq_u16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn sub_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
unsafe { vsubq_u16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn mul_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
unsafe { vmulq_u16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn and_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
unsafe { vandq_u16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn or_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
unsafe { vorrq_u16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn xor_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
unsafe { veorq_u16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn not_u16x8(self, a: u16x8<Self>) -> u16x8<Self> {
unsafe { vmvnq_u16(a.into()).simd_into(self) }
}
#[inline(always)]
fn shl_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
unsafe { vshlq_u16(a.into(), vdupq_n_s16(shift as i16)).simd_into(self) }
}
#[inline(always)]
fn shlv_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
unsafe { vshlq_u16(a.into(), vreinterpretq_s16_u16(b.into())).simd_into(self) }
}
#[inline(always)]
fn shr_u16x8(self, a: u16x8<Self>, shift: u32) -> u16x8<Self> {
unsafe { vshlq_u16(a.into(), vdupq_n_s16(-(shift as i16))).simd_into(self) }
}
#[inline(always)]
fn shrv_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
unsafe { vshlq_u16(a.into(), vnegq_s16(vreinterpretq_s16_u16(b.into()))).simd_into(self) }
}
#[inline(always)]
fn simd_eq_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
unsafe { vreinterpretq_s16_u16(vceqq_u16(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_lt_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
unsafe { vreinterpretq_s16_u16(vcltq_u16(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_le_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
unsafe { vreinterpretq_s16_u16(vcleq_u16(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_ge_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
unsafe { vreinterpretq_s16_u16(vcgeq_u16(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_gt_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> mask16x8<Self> {
unsafe { vreinterpretq_s16_u16(vcgtq_u16(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn zip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
let x = a.into();
let y = b.into();
unsafe { vzip1q_u16(x, y).simd_into(self) }
}
#[inline(always)]
fn zip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
let x = a.into();
let y = b.into();
unsafe { vzip2q_u16(x, y).simd_into(self) }
}
#[inline(always)]
fn unzip_low_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
let x = a.into();
let y = b.into();
unsafe { vuzp1q_u16(x, y).simd_into(self) }
}
#[inline(always)]
fn unzip_high_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
let x = a.into();
let y = b.into();
unsafe { vuzp2q_u16(x, y).simd_into(self) }
}
#[inline(always)]
fn select_u16x8(self, a: mask16x8<Self>, b: u16x8<Self>, c: u16x8<Self>) -> u16x8<Self> {
unsafe { vbslq_u16(vreinterpretq_u16_s16(a.into()), b.into(), c.into()).simd_into(self) }
}
#[inline(always)]
fn min_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
unsafe { vminq_u16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn max_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x8<Self> {
unsafe { vmaxq_u16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn combine_u16x8(self, a: u16x8<Self>, b: u16x8<Self>) -> u16x16<Self> {
u16x16 {
val: crate::support::Aligned256(uint16x8x2_t(a.val.0, b.val.0)),
simd: self,
}
}
#[inline(always)]
fn reinterpret_u8_u16x8(self, a: u16x8<Self>) -> u8x16<Self> {
unsafe { vreinterpretq_u8_u16(a.into()).simd_into(self) }
}
#[inline(always)]
fn reinterpret_u32_u16x8(self, a: u16x8<Self>) -> u32x4<Self> {
unsafe { vreinterpretq_u32_u16(a.into()).simd_into(self) }
}
#[inline(always)]
fn splat_mask16x8(self, val: i16) -> mask16x8<Self> {
unsafe { vdupq_n_s16(val).simd_into(self) }
}
#[inline(always)]
fn load_array_mask16x8(self, val: [i16; 8usize]) -> mask16x8<Self> {
mask16x8 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask16x8(self, val: &[i16; 8usize]) -> mask16x8<Self> {
mask16x8 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_mask16x8(self, a: mask16x8<Self>) -> [i16; 8usize] {
unsafe { core::mem::transmute::<int16x8_t, [i16; 8usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_mask16x8(self, a: &mask16x8<Self>) -> &[i16; 8usize] {
unsafe { core::mem::transmute::<&int16x8_t, &[i16; 8usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_mask16x8(self, a: &mut mask16x8<Self>) -> &mut [i16; 8usize] {
unsafe { core::mem::transmute::<&mut int16x8_t, &mut [i16; 8usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_mask16x8(self, a: mask16x8<Self>, dest: &mut [i16; 8usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i16,
dest.as_mut_ptr(),
8usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_mask16x8(self, a: u8x16<Self>) -> mask16x8<Self> {
unsafe {
mask16x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask16x8(self, a: mask16x8<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask16x8<const SHIFT: usize>(
self,
a: mask16x8<Self>,
b: mask16x8<Self>,
) -> mask16x8<Self> {
if SHIFT >= 8usize {
return b;
}
let result = unsafe {
dyn_vext_128(
self.cvt_to_bytes_mask16x8(a).val.0,
self.cvt_to_bytes_mask16x8(b).val.0,
SHIFT * 2usize,
)
};
self.cvt_from_bytes_mask16x8(u8x16 {
val: crate::support::Aligned128(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_mask16x8<const SHIFT: usize>(
self,
a: mask16x8<Self>,
b: mask16x8<Self>,
) -> mask16x8<Self> {
self.slide_mask16x8::<SHIFT>(a, b)
}
#[inline(always)]
fn and_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
unsafe { vandq_s16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn or_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
unsafe { vorrq_s16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn xor_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
unsafe { veorq_s16(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn not_mask16x8(self, a: mask16x8<Self>) -> mask16x8<Self> {
unsafe { vmvnq_s16(a.into()).simd_into(self) }
}
#[inline(always)]
fn select_mask16x8(
self,
a: mask16x8<Self>,
b: mask16x8<Self>,
c: mask16x8<Self>,
) -> mask16x8<Self> {
unsafe { vbslq_s16(vreinterpretq_u16_s16(a.into()), b.into(), c.into()).simd_into(self) }
}
#[inline(always)]
fn simd_eq_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x8<Self> {
unsafe { vreinterpretq_s16_u16(vceqq_s16(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn any_true_mask16x8(self, a: mask16x8<Self>) -> bool {
unsafe { vmaxvq_u32(vreinterpretq_u32_s16(a.into())) != 0 }
}
#[inline(always)]
fn all_true_mask16x8(self, a: mask16x8<Self>) -> bool {
unsafe { vminvq_u32(vreinterpretq_u32_s16(a.into())) == 0xffffffff }
}
#[inline(always)]
fn any_false_mask16x8(self, a: mask16x8<Self>) -> bool {
unsafe { vminvq_u32(vreinterpretq_u32_s16(a.into())) != 0xffffffff }
}
#[inline(always)]
fn all_false_mask16x8(self, a: mask16x8<Self>) -> bool {
unsafe { vmaxvq_u32(vreinterpretq_u32_s16(a.into())) == 0 }
}
#[inline(always)]
fn combine_mask16x8(self, a: mask16x8<Self>, b: mask16x8<Self>) -> mask16x16<Self> {
mask16x16 {
val: crate::support::Aligned256(int16x8x2_t(a.val.0, b.val.0)),
simd: self,
}
}
#[inline(always)]
fn splat_i32x4(self, val: i32) -> i32x4<Self> {
unsafe { vdupq_n_s32(val).simd_into(self) }
}
#[inline(always)]
fn load_array_i32x4(self, val: [i32; 4usize]) -> i32x4<Self> {
i32x4 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i32x4(self, val: &[i32; 4usize]) -> i32x4<Self> {
i32x4 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_i32x4(self, a: i32x4<Self>) -> [i32; 4usize] {
unsafe { core::mem::transmute::<int32x4_t, [i32; 4usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_i32x4(self, a: &i32x4<Self>) -> &[i32; 4usize] {
unsafe { core::mem::transmute::<&int32x4_t, &[i32; 4usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_i32x4(self, a: &mut i32x4<Self>) -> &mut [i32; 4usize] {
unsafe { core::mem::transmute::<&mut int32x4_t, &mut [i32; 4usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_i32x4(self, a: i32x4<Self>, dest: &mut [i32; 4usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i32,
dest.as_mut_ptr(),
4usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_i32x4(self, a: u8x16<Self>) -> i32x4<Self> {
unsafe {
i32x4 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i32x4(self, a: i32x4<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i32x4<const SHIFT: usize>(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
if SHIFT >= 4usize {
return b;
}
let result = unsafe {
dyn_vext_128(
self.cvt_to_bytes_i32x4(a).val.0,
self.cvt_to_bytes_i32x4(b).val.0,
SHIFT * 4usize,
)
};
self.cvt_from_bytes_i32x4(u8x16 {
val: crate::support::Aligned128(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_i32x4<const SHIFT: usize>(
self,
a: i32x4<Self>,
b: i32x4<Self>,
) -> i32x4<Self> {
self.slide_i32x4::<SHIFT>(a, b)
}
#[inline(always)]
fn add_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
unsafe { vaddq_s32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn sub_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
unsafe { vsubq_s32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn mul_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
unsafe { vmulq_s32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn and_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
unsafe { vandq_s32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn or_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
unsafe { vorrq_s32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn xor_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
unsafe { veorq_s32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn not_i32x4(self, a: i32x4<Self>) -> i32x4<Self> {
unsafe { vmvnq_s32(a.into()).simd_into(self) }
}
#[inline(always)]
fn shl_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
unsafe { vshlq_s32(a.into(), vdupq_n_s32(shift.cast_signed())).simd_into(self) }
}
#[inline(always)]
fn shlv_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
unsafe { vshlq_s32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn shr_i32x4(self, a: i32x4<Self>, shift: u32) -> i32x4<Self> {
unsafe { vshlq_s32(a.into(), vdupq_n_s32(-shift.cast_signed())).simd_into(self) }
}
#[inline(always)]
fn shrv_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
unsafe { vshlq_s32(a.into(), vnegq_s32(b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_eq_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
unsafe { vreinterpretq_s32_u32(vceqq_s32(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_lt_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
unsafe { vreinterpretq_s32_u32(vcltq_s32(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_le_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
unsafe { vreinterpretq_s32_u32(vcleq_s32(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_ge_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
unsafe { vreinterpretq_s32_u32(vcgeq_s32(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_gt_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> mask32x4<Self> {
unsafe { vreinterpretq_s32_u32(vcgtq_s32(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn zip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
let x = a.into();
let y = b.into();
unsafe { vzip1q_s32(x, y).simd_into(self) }
}
#[inline(always)]
fn zip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
let x = a.into();
let y = b.into();
unsafe { vzip2q_s32(x, y).simd_into(self) }
}
#[inline(always)]
fn unzip_low_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
let x = a.into();
let y = b.into();
unsafe { vuzp1q_s32(x, y).simd_into(self) }
}
#[inline(always)]
fn unzip_high_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
let x = a.into();
let y = b.into();
unsafe { vuzp2q_s32(x, y).simd_into(self) }
}
#[inline(always)]
fn select_i32x4(self, a: mask32x4<Self>, b: i32x4<Self>, c: i32x4<Self>) -> i32x4<Self> {
unsafe { vbslq_s32(vreinterpretq_u32_s32(a.into()), b.into(), c.into()).simd_into(self) }
}
#[inline(always)]
fn min_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
unsafe { vminq_s32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn max_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x4<Self> {
unsafe { vmaxq_s32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn combine_i32x4(self, a: i32x4<Self>, b: i32x4<Self>) -> i32x8<Self> {
i32x8 {
val: crate::support::Aligned256(int32x4x2_t(a.val.0, b.val.0)),
simd: self,
}
}
#[inline(always)]
fn neg_i32x4(self, a: i32x4<Self>) -> i32x4<Self> {
unsafe { vnegq_s32(a.into()).simd_into(self) }
}
#[inline(always)]
fn reinterpret_u8_i32x4(self, a: i32x4<Self>) -> u8x16<Self> {
unsafe { vreinterpretq_u8_s32(a.into()).simd_into(self) }
}
#[inline(always)]
fn reinterpret_u32_i32x4(self, a: i32x4<Self>) -> u32x4<Self> {
unsafe { vreinterpretq_u32_s32(a.into()).simd_into(self) }
}
#[inline(always)]
fn cvt_f32_i32x4(self, a: i32x4<Self>) -> f32x4<Self> {
unsafe { vcvtq_f32_s32(a.into()).simd_into(self) }
}
#[inline(always)]
fn splat_u32x4(self, val: u32) -> u32x4<Self> {
unsafe { vdupq_n_u32(val).simd_into(self) }
}
#[inline(always)]
fn load_array_u32x4(self, val: [u32; 4usize]) -> u32x4<Self> {
u32x4 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u32x4(self, val: &[u32; 4usize]) -> u32x4<Self> {
u32x4 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_u32x4(self, a: u32x4<Self>) -> [u32; 4usize] {
unsafe { core::mem::transmute::<uint32x4_t, [u32; 4usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_u32x4(self, a: &u32x4<Self>) -> &[u32; 4usize] {
unsafe { core::mem::transmute::<&uint32x4_t, &[u32; 4usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_u32x4(self, a: &mut u32x4<Self>) -> &mut [u32; 4usize] {
unsafe { core::mem::transmute::<&mut uint32x4_t, &mut [u32; 4usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_u32x4(self, a: u32x4<Self>, dest: &mut [u32; 4usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const u32,
dest.as_mut_ptr(),
4usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_u32x4(self, a: u8x16<Self>) -> u32x4<Self> {
unsafe {
u32x4 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u32x4(self, a: u32x4<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u32x4<const SHIFT: usize>(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
if SHIFT >= 4usize {
return b;
}
let result = unsafe {
dyn_vext_128(
self.cvt_to_bytes_u32x4(a).val.0,
self.cvt_to_bytes_u32x4(b).val.0,
SHIFT * 4usize,
)
};
self.cvt_from_bytes_u32x4(u8x16 {
val: crate::support::Aligned128(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_u32x4<const SHIFT: usize>(
self,
a: u32x4<Self>,
b: u32x4<Self>,
) -> u32x4<Self> {
self.slide_u32x4::<SHIFT>(a, b)
}
#[inline(always)]
fn add_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
unsafe { vaddq_u32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn sub_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
unsafe { vsubq_u32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn mul_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
unsafe { vmulq_u32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn and_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
unsafe { vandq_u32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn or_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
unsafe { vorrq_u32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn xor_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
unsafe { veorq_u32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn not_u32x4(self, a: u32x4<Self>) -> u32x4<Self> {
unsafe { vmvnq_u32(a.into()).simd_into(self) }
}
#[inline(always)]
fn shl_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
unsafe { vshlq_u32(a.into(), vdupq_n_s32(shift.cast_signed())).simd_into(self) }
}
#[inline(always)]
fn shlv_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
unsafe { vshlq_u32(a.into(), vreinterpretq_s32_u32(b.into())).simd_into(self) }
}
#[inline(always)]
fn shr_u32x4(self, a: u32x4<Self>, shift: u32) -> u32x4<Self> {
unsafe { vshlq_u32(a.into(), vdupq_n_s32(-shift.cast_signed())).simd_into(self) }
}
#[inline(always)]
fn shrv_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
unsafe { vshlq_u32(a.into(), vnegq_s32(vreinterpretq_s32_u32(b.into()))).simd_into(self) }
}
#[inline(always)]
fn simd_eq_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
unsafe { vreinterpretq_s32_u32(vceqq_u32(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_lt_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
unsafe { vreinterpretq_s32_u32(vcltq_u32(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_le_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
unsafe { vreinterpretq_s32_u32(vcleq_u32(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_ge_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
unsafe { vreinterpretq_s32_u32(vcgeq_u32(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_gt_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> mask32x4<Self> {
unsafe { vreinterpretq_s32_u32(vcgtq_u32(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn zip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
let x = a.into();
let y = b.into();
unsafe { vzip1q_u32(x, y).simd_into(self) }
}
#[inline(always)]
fn zip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
let x = a.into();
let y = b.into();
unsafe { vzip2q_u32(x, y).simd_into(self) }
}
#[inline(always)]
fn unzip_low_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
let x = a.into();
let y = b.into();
unsafe { vuzp1q_u32(x, y).simd_into(self) }
}
#[inline(always)]
fn unzip_high_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
let x = a.into();
let y = b.into();
unsafe { vuzp2q_u32(x, y).simd_into(self) }
}
#[inline(always)]
fn select_u32x4(self, a: mask32x4<Self>, b: u32x4<Self>, c: u32x4<Self>) -> u32x4<Self> {
unsafe { vbslq_u32(vreinterpretq_u32_s32(a.into()), b.into(), c.into()).simd_into(self) }
}
#[inline(always)]
fn min_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
unsafe { vminq_u32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn max_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x4<Self> {
unsafe { vmaxq_u32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn combine_u32x4(self, a: u32x4<Self>, b: u32x4<Self>) -> u32x8<Self> {
u32x8 {
val: crate::support::Aligned256(uint32x4x2_t(a.val.0, b.val.0)),
simd: self,
}
}
#[inline(always)]
fn reinterpret_u8_u32x4(self, a: u32x4<Self>) -> u8x16<Self> {
unsafe { vreinterpretq_u8_u32(a.into()).simd_into(self) }
}
#[inline(always)]
fn cvt_f32_u32x4(self, a: u32x4<Self>) -> f32x4<Self> {
unsafe { vcvtq_f32_u32(a.into()).simd_into(self) }
}
#[inline(always)]
fn splat_mask32x4(self, val: i32) -> mask32x4<Self> {
unsafe { vdupq_n_s32(val).simd_into(self) }
}
#[inline(always)]
fn load_array_mask32x4(self, val: [i32; 4usize]) -> mask32x4<Self> {
mask32x4 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask32x4(self, val: &[i32; 4usize]) -> mask32x4<Self> {
mask32x4 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_mask32x4(self, a: mask32x4<Self>) -> [i32; 4usize] {
unsafe { core::mem::transmute::<int32x4_t, [i32; 4usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_mask32x4(self, a: &mask32x4<Self>) -> &[i32; 4usize] {
unsafe { core::mem::transmute::<&int32x4_t, &[i32; 4usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_mask32x4(self, a: &mut mask32x4<Self>) -> &mut [i32; 4usize] {
unsafe { core::mem::transmute::<&mut int32x4_t, &mut [i32; 4usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_mask32x4(self, a: mask32x4<Self>, dest: &mut [i32; 4usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i32,
dest.as_mut_ptr(),
4usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_mask32x4(self, a: u8x16<Self>) -> mask32x4<Self> {
unsafe {
mask32x4 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask32x4(self, a: mask32x4<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask32x4<const SHIFT: usize>(
self,
a: mask32x4<Self>,
b: mask32x4<Self>,
) -> mask32x4<Self> {
if SHIFT >= 4usize {
return b;
}
let result = unsafe {
dyn_vext_128(
self.cvt_to_bytes_mask32x4(a).val.0,
self.cvt_to_bytes_mask32x4(b).val.0,
SHIFT * 4usize,
)
};
self.cvt_from_bytes_mask32x4(u8x16 {
val: crate::support::Aligned128(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_mask32x4<const SHIFT: usize>(
self,
a: mask32x4<Self>,
b: mask32x4<Self>,
) -> mask32x4<Self> {
self.slide_mask32x4::<SHIFT>(a, b)
}
#[inline(always)]
fn and_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
unsafe { vandq_s32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn or_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
unsafe { vorrq_s32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn xor_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
unsafe { veorq_s32(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn not_mask32x4(self, a: mask32x4<Self>) -> mask32x4<Self> {
unsafe { vmvnq_s32(a.into()).simd_into(self) }
}
#[inline(always)]
fn select_mask32x4(
self,
a: mask32x4<Self>,
b: mask32x4<Self>,
c: mask32x4<Self>,
) -> mask32x4<Self> {
unsafe { vbslq_s32(vreinterpretq_u32_s32(a.into()), b.into(), c.into()).simd_into(self) }
}
#[inline(always)]
fn simd_eq_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x4<Self> {
unsafe { vreinterpretq_s32_u32(vceqq_s32(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn any_true_mask32x4(self, a: mask32x4<Self>) -> bool {
unsafe { vmaxvq_u32(vreinterpretq_u32_s32(a.into())) != 0 }
}
#[inline(always)]
fn all_true_mask32x4(self, a: mask32x4<Self>) -> bool {
unsafe { vminvq_u32(vreinterpretq_u32_s32(a.into())) == 0xffffffff }
}
#[inline(always)]
fn any_false_mask32x4(self, a: mask32x4<Self>) -> bool {
unsafe { vminvq_u32(vreinterpretq_u32_s32(a.into())) != 0xffffffff }
}
#[inline(always)]
fn all_false_mask32x4(self, a: mask32x4<Self>) -> bool {
unsafe { vmaxvq_u32(vreinterpretq_u32_s32(a.into())) == 0 }
}
#[inline(always)]
fn combine_mask32x4(self, a: mask32x4<Self>, b: mask32x4<Self>) -> mask32x8<Self> {
mask32x8 {
val: crate::support::Aligned256(int32x4x2_t(a.val.0, b.val.0)),
simd: self,
}
}
#[inline(always)]
fn splat_f64x2(self, val: f64) -> f64x2<Self> {
unsafe { vdupq_n_f64(val).simd_into(self) }
}
#[inline(always)]
fn load_array_f64x2(self, val: [f64; 2usize]) -> f64x2<Self> {
f64x2 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_f64x2(self, val: &[f64; 2usize]) -> f64x2<Self> {
f64x2 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_f64x2(self, a: f64x2<Self>) -> [f64; 2usize] {
unsafe { core::mem::transmute::<float64x2_t, [f64; 2usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_f64x2(self, a: &f64x2<Self>) -> &[f64; 2usize] {
unsafe { core::mem::transmute::<&float64x2_t, &[f64; 2usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_f64x2(self, a: &mut f64x2<Self>) -> &mut [f64; 2usize] {
unsafe { core::mem::transmute::<&mut float64x2_t, &mut [f64; 2usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_f64x2(self, a: f64x2<Self>, dest: &mut [f64; 2usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const f64,
dest.as_mut_ptr(),
2usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_f64x2(self, a: u8x16<Self>) -> f64x2<Self> {
unsafe {
f64x2 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_f64x2(self, a: f64x2<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_f64x2<const SHIFT: usize>(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
if SHIFT >= 2usize {
return b;
}
let result = unsafe {
dyn_vext_128(
self.cvt_to_bytes_f64x2(a).val.0,
self.cvt_to_bytes_f64x2(b).val.0,
SHIFT * 8usize,
)
};
self.cvt_from_bytes_f64x2(u8x16 {
val: crate::support::Aligned128(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_f64x2<const SHIFT: usize>(
self,
a: f64x2<Self>,
b: f64x2<Self>,
) -> f64x2<Self> {
self.slide_f64x2::<SHIFT>(a, b)
}
#[inline(always)]
fn abs_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
unsafe { vabsq_f64(a.into()).simd_into(self) }
}
#[inline(always)]
fn neg_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
unsafe { vnegq_f64(a.into()).simd_into(self) }
}
#[inline(always)]
fn sqrt_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
unsafe { vsqrtq_f64(a.into()).simd_into(self) }
}
#[inline(always)]
fn add_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
unsafe { vaddq_f64(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn sub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
unsafe { vsubq_f64(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn mul_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
unsafe { vmulq_f64(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn div_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
unsafe { vdivq_f64(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn copysign_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
unsafe {
let sign_mask = vdupq_n_u64(1 << 63);
vbslq_f64(sign_mask, b.into(), a.into()).simd_into(self)
}
}
#[inline(always)]
fn simd_eq_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
unsafe { vreinterpretq_s64_u64(vceqq_f64(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_lt_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
unsafe { vreinterpretq_s64_u64(vcltq_f64(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_le_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
unsafe { vreinterpretq_s64_u64(vcleq_f64(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_ge_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
unsafe { vreinterpretq_s64_u64(vcgeq_f64(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn simd_gt_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> mask64x2<Self> {
unsafe { vreinterpretq_s64_u64(vcgtq_f64(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn zip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
let x = a.into();
let y = b.into();
unsafe { vzip1q_f64(x, y).simd_into(self) }
}
#[inline(always)]
fn zip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
let x = a.into();
let y = b.into();
unsafe { vzip2q_f64(x, y).simd_into(self) }
}
#[inline(always)]
fn unzip_low_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
let x = a.into();
let y = b.into();
unsafe { vuzp1q_f64(x, y).simd_into(self) }
}
#[inline(always)]
fn unzip_high_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
let x = a.into();
let y = b.into();
unsafe { vuzp2q_f64(x, y).simd_into(self) }
}
#[inline(always)]
fn max_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
unsafe { vmaxq_f64(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn min_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
unsafe { vminq_f64(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn max_precise_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
unsafe { vmaxnmq_f64(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn min_precise_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x2<Self> {
unsafe { vminnmq_f64(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn mul_add_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
unsafe { vfmaq_f64(c.into(), b.into(), a.into()).simd_into(self) }
}
#[inline(always)]
fn mul_sub_f64x2(self, a: f64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
unsafe { vnegq_f64(vfmsq_f64(c.into(), b.into(), a.into())).simd_into(self) }
}
#[inline(always)]
fn floor_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
unsafe { vrndmq_f64(a.into()).simd_into(self) }
}
#[inline(always)]
fn ceil_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
unsafe { vrndpq_f64(a.into()).simd_into(self) }
}
#[inline(always)]
fn round_ties_even_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
unsafe { vrndnq_f64(a.into()).simd_into(self) }
}
#[inline(always)]
fn fract_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
unsafe {
let c1 = vcvtq_s64_f64(a.into());
let c2 = vcvtq_f64_s64(c1);
vsubq_f64(a.into(), c2).simd_into(self)
}
}
#[inline(always)]
fn trunc_f64x2(self, a: f64x2<Self>) -> f64x2<Self> {
unsafe { vrndq_f64(a.into()).simd_into(self) }
}
#[inline(always)]
fn select_f64x2(self, a: mask64x2<Self>, b: f64x2<Self>, c: f64x2<Self>) -> f64x2<Self> {
unsafe { vbslq_f64(vreinterpretq_u64_s64(a.into()), b.into(), c.into()).simd_into(self) }
}
#[inline(always)]
fn combine_f64x2(self, a: f64x2<Self>, b: f64x2<Self>) -> f64x4<Self> {
f64x4 {
val: crate::support::Aligned256(float64x2x2_t(a.val.0, b.val.0)),
simd: self,
}
}
#[inline(always)]
fn reinterpret_f32_f64x2(self, a: f64x2<Self>) -> f32x4<Self> {
unsafe { vreinterpretq_f32_f64(a.into()).simd_into(self) }
}
#[inline(always)]
fn splat_mask64x2(self, val: i64) -> mask64x2<Self> {
unsafe { vdupq_n_s64(val).simd_into(self) }
}
#[inline(always)]
fn load_array_mask64x2(self, val: [i64; 2usize]) -> mask64x2<Self> {
mask64x2 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask64x2(self, val: &[i64; 2usize]) -> mask64x2<Self> {
mask64x2 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_mask64x2(self, a: mask64x2<Self>) -> [i64; 2usize] {
unsafe { core::mem::transmute::<int64x2_t, [i64; 2usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_mask64x2(self, a: &mask64x2<Self>) -> &[i64; 2usize] {
unsafe { core::mem::transmute::<&int64x2_t, &[i64; 2usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_mask64x2(self, a: &mut mask64x2<Self>) -> &mut [i64; 2usize] {
unsafe { core::mem::transmute::<&mut int64x2_t, &mut [i64; 2usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_mask64x2(self, a: mask64x2<Self>, dest: &mut [i64; 2usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i64,
dest.as_mut_ptr(),
2usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_mask64x2(self, a: u8x16<Self>) -> mask64x2<Self> {
unsafe {
mask64x2 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask64x2(self, a: mask64x2<Self>) -> u8x16<Self> {
unsafe {
u8x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask64x2<const SHIFT: usize>(
self,
a: mask64x2<Self>,
b: mask64x2<Self>,
) -> mask64x2<Self> {
if SHIFT >= 2usize {
return b;
}
let result = unsafe {
dyn_vext_128(
self.cvt_to_bytes_mask64x2(a).val.0,
self.cvt_to_bytes_mask64x2(b).val.0,
SHIFT * 8usize,
)
};
self.cvt_from_bytes_mask64x2(u8x16 {
val: crate::support::Aligned128(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_mask64x2<const SHIFT: usize>(
self,
a: mask64x2<Self>,
b: mask64x2<Self>,
) -> mask64x2<Self> {
self.slide_mask64x2::<SHIFT>(a, b)
}
#[inline(always)]
fn and_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
unsafe { vandq_s64(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn or_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
unsafe { vorrq_s64(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn xor_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
unsafe { veorq_s64(a.into(), b.into()).simd_into(self) }
}
#[inline(always)]
fn not_mask64x2(self, a: mask64x2<Self>) -> mask64x2<Self> {
unsafe { vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a.into()))).simd_into(self) }
}
#[inline(always)]
fn select_mask64x2(
self,
a: mask64x2<Self>,
b: mask64x2<Self>,
c: mask64x2<Self>,
) -> mask64x2<Self> {
unsafe { vbslq_s64(vreinterpretq_u64_s64(a.into()), b.into(), c.into()).simd_into(self) }
}
#[inline(always)]
fn simd_eq_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x2<Self> {
unsafe { vreinterpretq_s64_u64(vceqq_s64(a.into(), b.into())).simd_into(self) }
}
#[inline(always)]
fn any_true_mask64x2(self, a: mask64x2<Self>) -> bool {
unsafe { vmaxvq_u32(vreinterpretq_u32_s64(a.into())) != 0 }
}
#[inline(always)]
fn all_true_mask64x2(self, a: mask64x2<Self>) -> bool {
unsafe { vminvq_u32(vreinterpretq_u32_s64(a.into())) == 0xffffffff }
}
#[inline(always)]
fn any_false_mask64x2(self, a: mask64x2<Self>) -> bool {
unsafe { vminvq_u32(vreinterpretq_u32_s64(a.into())) != 0xffffffff }
}
#[inline(always)]
fn all_false_mask64x2(self, a: mask64x2<Self>) -> bool {
unsafe { vmaxvq_u32(vreinterpretq_u32_s64(a.into())) == 0 }
}
#[inline(always)]
fn combine_mask64x2(self, a: mask64x2<Self>, b: mask64x2<Self>) -> mask64x4<Self> {
mask64x4 {
val: crate::support::Aligned256(int64x2x2_t(a.val.0, b.val.0)),
simd: self,
}
}
#[inline(always)]
fn splat_f32x8(self, val: f32) -> f32x8<Self> {
let half = self.splat_f32x4(val);
self.combine_f32x4(half, half)
}
#[inline(always)]
fn load_array_f32x8(self, val: [f32; 8usize]) -> f32x8<Self> {
f32x8 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_f32x8(self, val: &[f32; 8usize]) -> f32x8<Self> {
f32x8 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_f32x8(self, a: f32x8<Self>) -> [f32; 8usize] {
unsafe { core::mem::transmute::<float32x4x2_t, [f32; 8usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_f32x8(self, a: &f32x8<Self>) -> &[f32; 8usize] {
unsafe { core::mem::transmute::<&float32x4x2_t, &[f32; 8usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_f32x8(self, a: &mut f32x8<Self>) -> &mut [f32; 8usize] {
unsafe { core::mem::transmute::<&mut float32x4x2_t, &mut [f32; 8usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_f32x8(self, a: f32x8<Self>, dest: &mut [f32; 8usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const f32,
dest.as_mut_ptr(),
8usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_f32x8(self, a: u8x32<Self>) -> f32x8<Self> {
unsafe {
f32x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_f32x8(self, a: f32x8<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_f32x8<const SHIFT: usize>(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
if SHIFT >= 8usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_f32x8(a).val.0;
let b_bytes = self.cvt_to_bytes_f32x8(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1];
let b_blocks = [b_bytes.0, b_bytes.1];
let shift_bytes = SHIFT * 4usize;
uint8x16x2_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_f32x8(u8x32 {
val: crate::support::Aligned256(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_f32x8<const SHIFT: usize>(
self,
a: f32x8<Self>,
b: f32x8<Self>,
) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(
self.slide_within_blocks_f32x4::<SHIFT>(a0, b0),
self.slide_within_blocks_f32x4::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn abs_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f32x4(self.abs_f32x4(a0), self.abs_f32x4(a1))
}
#[inline(always)]
fn neg_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f32x4(self.neg_f32x4(a0), self.neg_f32x4(a1))
}
#[inline(always)]
fn sqrt_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f32x4(self.sqrt_f32x4(a0), self.sqrt_f32x4(a1))
}
#[inline(always)]
fn add_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.add_f32x4(a0, b0), self.add_f32x4(a1, b1))
}
#[inline(always)]
fn sub_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.sub_f32x4(a0, b0), self.sub_f32x4(a1, b1))
}
#[inline(always)]
fn mul_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.mul_f32x4(a0, b0), self.mul_f32x4(a1, b1))
}
#[inline(always)]
fn div_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.div_f32x4(a0, b0), self.div_f32x4(a1, b1))
}
#[inline(always)]
fn copysign_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.copysign_f32x4(a0, b0), self.copysign_f32x4(a1, b1))
}
#[inline(always)]
fn simd_eq_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_mask32x4(self.simd_eq_f32x4(a0, b0), self.simd_eq_f32x4(a1, b1))
}
#[inline(always)]
fn simd_lt_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_mask32x4(self.simd_lt_f32x4(a0, b0), self.simd_lt_f32x4(a1, b1))
}
#[inline(always)]
fn simd_le_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_mask32x4(self.simd_le_f32x4(a0, b0), self.simd_le_f32x4(a1, b1))
}
#[inline(always)]
fn simd_ge_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_mask32x4(self.simd_ge_f32x4(a0, b0), self.simd_ge_f32x4(a1, b1))
}
#[inline(always)]
fn simd_gt_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_mask32x4(self.simd_gt_f32x4(a0, b0), self.simd_gt_f32x4(a1, b1))
}
#[inline(always)]
fn zip_low_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, _) = self.split_f32x8(a);
let (b0, _) = self.split_f32x8(b);
self.combine_f32x4(self.zip_low_f32x4(a0, b0), self.zip_high_f32x4(a0, b0))
}
#[inline(always)]
fn zip_high_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (_, a1) = self.split_f32x8(a);
let (_, b1) = self.split_f32x8(b);
self.combine_f32x4(self.zip_low_f32x4(a1, b1), self.zip_high_f32x4(a1, b1))
}
#[inline(always)]
fn unzip_low_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.unzip_low_f32x4(a0, a1), self.unzip_low_f32x4(b0, b1))
}
#[inline(always)]
fn unzip_high_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.unzip_high_f32x4(a0, a1), self.unzip_high_f32x4(b0, b1))
}
#[inline(always)]
fn max_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.max_f32x4(a0, b0), self.max_f32x4(a1, b1))
}
#[inline(always)]
fn min_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(self.min_f32x4(a0, b0), self.min_f32x4(a1, b1))
}
#[inline(always)]
fn max_precise_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(
self.max_precise_f32x4(a0, b0),
self.max_precise_f32x4(a1, b1),
)
}
#[inline(always)]
fn min_precise_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
self.combine_f32x4(
self.min_precise_f32x4(a0, b0),
self.min_precise_f32x4(a1, b1),
)
}
#[inline(always)]
fn mul_add_f32x8(self, a: f32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
let (c0, c1) = self.split_f32x8(c);
self.combine_f32x4(
self.mul_add_f32x4(a0, b0, c0),
self.mul_add_f32x4(a1, b1, c1),
)
}
#[inline(always)]
fn mul_sub_f32x8(self, a: f32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
let (b0, b1) = self.split_f32x8(b);
let (c0, c1) = self.split_f32x8(c);
self.combine_f32x4(
self.mul_sub_f32x4(a0, b0, c0),
self.mul_sub_f32x4(a1, b1, c1),
)
}
#[inline(always)]
fn floor_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f32x4(self.floor_f32x4(a0), self.floor_f32x4(a1))
}
#[inline(always)]
fn ceil_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f32x4(self.ceil_f32x4(a0), self.ceil_f32x4(a1))
}
#[inline(always)]
fn round_ties_even_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f32x4(
self.round_ties_even_f32x4(a0),
self.round_ties_even_f32x4(a1),
)
}
#[inline(always)]
fn fract_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f32x4(self.fract_f32x4(a0), self.fract_f32x4(a1))
}
#[inline(always)]
fn trunc_f32x8(self, a: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f32x4(self.trunc_f32x4(a0), self.trunc_f32x4(a1))
}
#[inline(always)]
fn select_f32x8(self, a: mask32x8<Self>, b: f32x8<Self>, c: f32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_f32x8(b);
let (c0, c1) = self.split_f32x8(c);
self.combine_f32x4(self.select_f32x4(a0, b0, c0), self.select_f32x4(a1, b1, c1))
}
#[inline(always)]
fn combine_f32x8(self, a: f32x8<Self>, b: f32x8<Self>) -> f32x16<Self> {
f32x16 {
val: crate::support::Aligned512(float32x4x4_t(
a.val.0.0, a.val.0.1, b.val.0.0, b.val.0.1,
)),
simd: self,
}
}
#[inline(always)]
fn split_f32x8(self, a: f32x8<Self>) -> (f32x4<Self>, f32x4<Self>) {
(
f32x4 {
val: crate::support::Aligned128(a.val.0.0),
simd: self,
},
f32x4 {
val: crate::support::Aligned128(a.val.0.1),
simd: self,
},
)
}
#[inline(always)]
fn reinterpret_f64_f32x8(self, a: f32x8<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_f64x2(
self.reinterpret_f64_f32x4(a0),
self.reinterpret_f64_f32x4(a1),
)
}
#[inline(always)]
fn reinterpret_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_i32x4(
self.reinterpret_i32_f32x4(a0),
self.reinterpret_i32_f32x4(a1),
)
}
#[inline(always)]
fn reinterpret_u8_f32x8(self, a: f32x8<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_u8x16(self.reinterpret_u8_f32x4(a0), self.reinterpret_u8_f32x4(a1))
}
#[inline(always)]
fn reinterpret_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_u32x4(
self.reinterpret_u32_f32x4(a0),
self.reinterpret_u32_f32x4(a1),
)
}
#[inline(always)]
fn cvt_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_u32x4(self.cvt_u32_f32x4(a0), self.cvt_u32_f32x4(a1))
}
#[inline(always)]
fn cvt_u32_precise_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_u32x4(
self.cvt_u32_precise_f32x4(a0),
self.cvt_u32_precise_f32x4(a1),
)
}
#[inline(always)]
fn cvt_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_i32x4(self.cvt_i32_f32x4(a0), self.cvt_i32_f32x4(a1))
}
#[inline(always)]
fn cvt_i32_precise_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_f32x8(a);
self.combine_i32x4(
self.cvt_i32_precise_f32x4(a0),
self.cvt_i32_precise_f32x4(a1),
)
}
#[inline(always)]
fn splat_i8x32(self, val: i8) -> i8x32<Self> {
let half = self.splat_i8x16(val);
self.combine_i8x16(half, half)
}
#[inline(always)]
fn load_array_i8x32(self, val: [i8; 32usize]) -> i8x32<Self> {
i8x32 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i8x32(self, val: &[i8; 32usize]) -> i8x32<Self> {
i8x32 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_i8x32(self, a: i8x32<Self>) -> [i8; 32usize] {
unsafe { core::mem::transmute::<int8x16x2_t, [i8; 32usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_i8x32(self, a: &i8x32<Self>) -> &[i8; 32usize] {
unsafe { core::mem::transmute::<&int8x16x2_t, &[i8; 32usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_i8x32(self, a: &mut i8x32<Self>) -> &mut [i8; 32usize] {
unsafe { core::mem::transmute::<&mut int8x16x2_t, &mut [i8; 32usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_i8x32(self, a: i8x32<Self>, dest: &mut [i8; 32usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i8,
dest.as_mut_ptr(),
32usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_i8x32(self, a: u8x32<Self>) -> i8x32<Self> {
unsafe {
i8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i8x32(self, a: i8x32<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i8x32<const SHIFT: usize>(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
if SHIFT >= 32usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_i8x32(a).val.0;
let b_bytes = self.cvt_to_bytes_i8x32(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1];
let b_blocks = [b_bytes.0, b_bytes.1];
let shift_bytes = SHIFT;
uint8x16x2_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_i8x32(u8x32 {
val: crate::support::Aligned256(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_i8x32<const SHIFT: usize>(
self,
a: i8x32<Self>,
b: i8x32<Self>,
) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(
self.slide_within_blocks_i8x16::<SHIFT>(a0, b0),
self.slide_within_blocks_i8x16::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.add_i8x16(a0, b0), self.add_i8x16(a1, b1))
}
#[inline(always)]
fn sub_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.sub_i8x16(a0, b0), self.sub_i8x16(a1, b1))
}
#[inline(always)]
fn mul_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.mul_i8x16(a0, b0), self.mul_i8x16(a1, b1))
}
#[inline(always)]
fn and_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.and_i8x16(a0, b0), self.and_i8x16(a1, b1))
}
#[inline(always)]
fn or_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.or_i8x16(a0, b0), self.or_i8x16(a1, b1))
}
#[inline(always)]
fn xor_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.xor_i8x16(a0, b0), self.xor_i8x16(a1, b1))
}
#[inline(always)]
fn not_i8x32(self, a: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
self.combine_i8x16(self.not_i8x16(a0), self.not_i8x16(a1))
}
#[inline(always)]
fn shl_i8x32(self, a: i8x32<Self>, shift: u32) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
self.combine_i8x16(self.shl_i8x16(a0, shift), self.shl_i8x16(a1, shift))
}
#[inline(always)]
fn shlv_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.shlv_i8x16(a0, b0), self.shlv_i8x16(a1, b1))
}
#[inline(always)]
fn shr_i8x32(self, a: i8x32<Self>, shift: u32) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
self.combine_i8x16(self.shr_i8x16(a0, shift), self.shr_i8x16(a1, shift))
}
#[inline(always)]
fn shrv_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.shrv_i8x16(a0, b0), self.shrv_i8x16(a1, b1))
}
#[inline(always)]
fn simd_eq_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_mask8x16(self.simd_eq_i8x16(a0, b0), self.simd_eq_i8x16(a1, b1))
}
#[inline(always)]
fn simd_lt_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_mask8x16(self.simd_lt_i8x16(a0, b0), self.simd_lt_i8x16(a1, b1))
}
#[inline(always)]
fn simd_le_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_mask8x16(self.simd_le_i8x16(a0, b0), self.simd_le_i8x16(a1, b1))
}
#[inline(always)]
fn simd_ge_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_mask8x16(self.simd_ge_i8x16(a0, b0), self.simd_ge_i8x16(a1, b1))
}
#[inline(always)]
fn simd_gt_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_mask8x16(self.simd_gt_i8x16(a0, b0), self.simd_gt_i8x16(a1, b1))
}
#[inline(always)]
fn zip_low_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, _) = self.split_i8x32(a);
let (b0, _) = self.split_i8x32(b);
self.combine_i8x16(self.zip_low_i8x16(a0, b0), self.zip_high_i8x16(a0, b0))
}
#[inline(always)]
fn zip_high_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (_, a1) = self.split_i8x32(a);
let (_, b1) = self.split_i8x32(b);
self.combine_i8x16(self.zip_low_i8x16(a1, b1), self.zip_high_i8x16(a1, b1))
}
#[inline(always)]
fn unzip_low_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.unzip_low_i8x16(a0, a1), self.unzip_low_i8x16(b0, b1))
}
#[inline(always)]
fn unzip_high_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.unzip_high_i8x16(a0, a1), self.unzip_high_i8x16(b0, b1))
}
#[inline(always)]
fn select_i8x32(self, a: mask8x32<Self>, b: i8x32<Self>, c: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
let (b0, b1) = self.split_i8x32(b);
let (c0, c1) = self.split_i8x32(c);
self.combine_i8x16(self.select_i8x16(a0, b0, c0), self.select_i8x16(a1, b1, c1))
}
#[inline(always)]
fn min_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.min_i8x16(a0, b0), self.min_i8x16(a1, b1))
}
#[inline(always)]
fn max_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
let (b0, b1) = self.split_i8x32(b);
self.combine_i8x16(self.max_i8x16(a0, b0), self.max_i8x16(a1, b1))
}
#[inline(always)]
fn combine_i8x32(self, a: i8x32<Self>, b: i8x32<Self>) -> i8x64<Self> {
i8x64 {
val: crate::support::Aligned512(int8x16x4_t(
a.val.0.0, a.val.0.1, b.val.0.0, b.val.0.1,
)),
simd: self,
}
}
#[inline(always)]
fn split_i8x32(self, a: i8x32<Self>) -> (i8x16<Self>, i8x16<Self>) {
(
i8x16 {
val: crate::support::Aligned128(a.val.0.0),
simd: self,
},
i8x16 {
val: crate::support::Aligned128(a.val.0.1),
simd: self,
},
)
}
#[inline(always)]
fn neg_i8x32(self, a: i8x32<Self>) -> i8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
self.combine_i8x16(self.neg_i8x16(a0), self.neg_i8x16(a1))
}
#[inline(always)]
fn reinterpret_u8_i8x32(self, a: i8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_i8x32(a);
self.combine_u8x16(self.reinterpret_u8_i8x16(a0), self.reinterpret_u8_i8x16(a1))
}
#[inline(always)]
fn reinterpret_u32_i8x32(self, a: i8x32<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_i8x32(a);
self.combine_u32x4(
self.reinterpret_u32_i8x16(a0),
self.reinterpret_u32_i8x16(a1),
)
}
#[inline(always)]
fn splat_u8x32(self, val: u8) -> u8x32<Self> {
let half = self.splat_u8x16(val);
self.combine_u8x16(half, half)
}
#[inline(always)]
fn load_array_u8x32(self, val: [u8; 32usize]) -> u8x32<Self> {
u8x32 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u8x32(self, val: &[u8; 32usize]) -> u8x32<Self> {
u8x32 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_u8x32(self, a: u8x32<Self>) -> [u8; 32usize] {
unsafe { core::mem::transmute::<uint8x16x2_t, [u8; 32usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_u8x32(self, a: &u8x32<Self>) -> &[u8; 32usize] {
unsafe { core::mem::transmute::<&uint8x16x2_t, &[u8; 32usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_u8x32(self, a: &mut u8x32<Self>) -> &mut [u8; 32usize] {
unsafe { core::mem::transmute::<&mut uint8x16x2_t, &mut [u8; 32usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_u8x32(self, a: u8x32<Self>, dest: &mut [u8; 32usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const u8,
dest.as_mut_ptr(),
32usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_u8x32(self, a: u8x32<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u8x32(self, a: u8x32<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u8x32<const SHIFT: usize>(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
if SHIFT >= 32usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_u8x32(a).val.0;
let b_bytes = self.cvt_to_bytes_u8x32(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1];
let b_blocks = [b_bytes.0, b_bytes.1];
let shift_bytes = SHIFT;
uint8x16x2_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_u8x32(u8x32 {
val: crate::support::Aligned256(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_u8x32<const SHIFT: usize>(
self,
a: u8x32<Self>,
b: u8x32<Self>,
) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(
self.slide_within_blocks_u8x16::<SHIFT>(a0, b0),
self.slide_within_blocks_u8x16::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.add_u8x16(a0, b0), self.add_u8x16(a1, b1))
}
#[inline(always)]
fn sub_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.sub_u8x16(a0, b0), self.sub_u8x16(a1, b1))
}
#[inline(always)]
fn mul_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.mul_u8x16(a0, b0), self.mul_u8x16(a1, b1))
}
#[inline(always)]
fn and_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.and_u8x16(a0, b0), self.and_u8x16(a1, b1))
}
#[inline(always)]
fn or_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.or_u8x16(a0, b0), self.or_u8x16(a1, b1))
}
#[inline(always)]
fn xor_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.xor_u8x16(a0, b0), self.xor_u8x16(a1, b1))
}
#[inline(always)]
fn not_u8x32(self, a: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
self.combine_u8x16(self.not_u8x16(a0), self.not_u8x16(a1))
}
#[inline(always)]
fn shl_u8x32(self, a: u8x32<Self>, shift: u32) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
self.combine_u8x16(self.shl_u8x16(a0, shift), self.shl_u8x16(a1, shift))
}
#[inline(always)]
fn shlv_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.shlv_u8x16(a0, b0), self.shlv_u8x16(a1, b1))
}
#[inline(always)]
fn shr_u8x32(self, a: u8x32<Self>, shift: u32) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
self.combine_u8x16(self.shr_u8x16(a0, shift), self.shr_u8x16(a1, shift))
}
#[inline(always)]
fn shrv_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.shrv_u8x16(a0, b0), self.shrv_u8x16(a1, b1))
}
#[inline(always)]
fn simd_eq_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_mask8x16(self.simd_eq_u8x16(a0, b0), self.simd_eq_u8x16(a1, b1))
}
#[inline(always)]
fn simd_lt_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_mask8x16(self.simd_lt_u8x16(a0, b0), self.simd_lt_u8x16(a1, b1))
}
#[inline(always)]
fn simd_le_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_mask8x16(self.simd_le_u8x16(a0, b0), self.simd_le_u8x16(a1, b1))
}
#[inline(always)]
fn simd_ge_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_mask8x16(self.simd_ge_u8x16(a0, b0), self.simd_ge_u8x16(a1, b1))
}
#[inline(always)]
fn simd_gt_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_mask8x16(self.simd_gt_u8x16(a0, b0), self.simd_gt_u8x16(a1, b1))
}
#[inline(always)]
fn zip_low_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, _) = self.split_u8x32(a);
let (b0, _) = self.split_u8x32(b);
self.combine_u8x16(self.zip_low_u8x16(a0, b0), self.zip_high_u8x16(a0, b0))
}
#[inline(always)]
fn zip_high_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (_, a1) = self.split_u8x32(a);
let (_, b1) = self.split_u8x32(b);
self.combine_u8x16(self.zip_low_u8x16(a1, b1), self.zip_high_u8x16(a1, b1))
}
#[inline(always)]
fn unzip_low_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.unzip_low_u8x16(a0, a1), self.unzip_low_u8x16(b0, b1))
}
#[inline(always)]
fn unzip_high_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.unzip_high_u8x16(a0, a1), self.unzip_high_u8x16(b0, b1))
}
#[inline(always)]
fn select_u8x32(self, a: mask8x32<Self>, b: u8x32<Self>, c: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
let (b0, b1) = self.split_u8x32(b);
let (c0, c1) = self.split_u8x32(c);
self.combine_u8x16(self.select_u8x16(a0, b0, c0), self.select_u8x16(a1, b1, c1))
}
#[inline(always)]
fn min_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.min_u8x16(a0, b0), self.min_u8x16(a1, b1))
}
#[inline(always)]
fn max_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u8x32(a);
let (b0, b1) = self.split_u8x32(b);
self.combine_u8x16(self.max_u8x16(a0, b0), self.max_u8x16(a1, b1))
}
#[inline(always)]
fn combine_u8x32(self, a: u8x32<Self>, b: u8x32<Self>) -> u8x64<Self> {
u8x64 {
val: crate::support::Aligned512(uint8x16x4_t(
a.val.0.0, a.val.0.1, b.val.0.0, b.val.0.1,
)),
simd: self,
}
}
#[inline(always)]
fn split_u8x32(self, a: u8x32<Self>) -> (u8x16<Self>, u8x16<Self>) {
(
u8x16 {
val: crate::support::Aligned128(a.val.0.0),
simd: self,
},
u8x16 {
val: crate::support::Aligned128(a.val.0.1),
simd: self,
},
)
}
#[inline(always)]
fn widen_u8x32(self, a: u8x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u8x32(a);
self.combine_u16x16(self.widen_u8x16(a0), self.widen_u8x16(a1))
}
#[inline(always)]
fn reinterpret_u32_u8x32(self, a: u8x32<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u8x32(a);
self.combine_u32x4(
self.reinterpret_u32_u8x16(a0),
self.reinterpret_u32_u8x16(a1),
)
}
#[inline(always)]
fn splat_mask8x32(self, val: i8) -> mask8x32<Self> {
let half = self.splat_mask8x16(val);
self.combine_mask8x16(half, half)
}
#[inline(always)]
fn load_array_mask8x32(self, val: [i8; 32usize]) -> mask8x32<Self> {
mask8x32 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask8x32(self, val: &[i8; 32usize]) -> mask8x32<Self> {
mask8x32 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_mask8x32(self, a: mask8x32<Self>) -> [i8; 32usize] {
unsafe { core::mem::transmute::<int8x16x2_t, [i8; 32usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_mask8x32(self, a: &mask8x32<Self>) -> &[i8; 32usize] {
unsafe { core::mem::transmute::<&int8x16x2_t, &[i8; 32usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_mask8x32(self, a: &mut mask8x32<Self>) -> &mut [i8; 32usize] {
unsafe { core::mem::transmute::<&mut int8x16x2_t, &mut [i8; 32usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_mask8x32(self, a: mask8x32<Self>, dest: &mut [i8; 32usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i8,
dest.as_mut_ptr(),
32usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_mask8x32(self, a: u8x32<Self>) -> mask8x32<Self> {
unsafe {
mask8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask8x32(self, a: mask8x32<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask8x32<const SHIFT: usize>(
self,
a: mask8x32<Self>,
b: mask8x32<Self>,
) -> mask8x32<Self> {
if SHIFT >= 32usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_mask8x32(a).val.0;
let b_bytes = self.cvt_to_bytes_mask8x32(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1];
let b_blocks = [b_bytes.0, b_bytes.1];
let shift_bytes = SHIFT;
uint8x16x2_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_mask8x32(u8x32 {
val: crate::support::Aligned256(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_mask8x32<const SHIFT: usize>(
self,
a: mask8x32<Self>,
b: mask8x32<Self>,
) -> mask8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
let (b0, b1) = self.split_mask8x32(b);
self.combine_mask8x16(
self.slide_within_blocks_mask8x16::<SHIFT>(a0, b0),
self.slide_within_blocks_mask8x16::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn and_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
let (b0, b1) = self.split_mask8x32(b);
self.combine_mask8x16(self.and_mask8x16(a0, b0), self.and_mask8x16(a1, b1))
}
#[inline(always)]
fn or_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
let (b0, b1) = self.split_mask8x32(b);
self.combine_mask8x16(self.or_mask8x16(a0, b0), self.or_mask8x16(a1, b1))
}
#[inline(always)]
fn xor_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
let (b0, b1) = self.split_mask8x32(b);
self.combine_mask8x16(self.xor_mask8x16(a0, b0), self.xor_mask8x16(a1, b1))
}
#[inline(always)]
fn not_mask8x32(self, a: mask8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
self.combine_mask8x16(self.not_mask8x16(a0), self.not_mask8x16(a1))
}
#[inline(always)]
fn select_mask8x32(
self,
a: mask8x32<Self>,
b: mask8x32<Self>,
c: mask8x32<Self>,
) -> mask8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
let (b0, b1) = self.split_mask8x32(b);
let (c0, c1) = self.split_mask8x32(c);
self.combine_mask8x16(
self.select_mask8x16(a0, b0, c0),
self.select_mask8x16(a1, b1, c1),
)
}
#[inline(always)]
fn simd_eq_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x32<Self> {
let (a0, a1) = self.split_mask8x32(a);
let (b0, b1) = self.split_mask8x32(b);
self.combine_mask8x16(self.simd_eq_mask8x16(a0, b0), self.simd_eq_mask8x16(a1, b1))
}
#[inline(always)]
fn any_true_mask8x32(self, a: mask8x32<Self>) -> bool {
let (a0, a1) = self.split_mask8x32(a);
self.any_true_mask8x16(a0) || self.any_true_mask8x16(a1)
}
#[inline(always)]
fn all_true_mask8x32(self, a: mask8x32<Self>) -> bool {
let (a0, a1) = self.split_mask8x32(a);
self.all_true_mask8x16(a0) && self.all_true_mask8x16(a1)
}
#[inline(always)]
fn any_false_mask8x32(self, a: mask8x32<Self>) -> bool {
let (a0, a1) = self.split_mask8x32(a);
self.any_false_mask8x16(a0) || self.any_false_mask8x16(a1)
}
#[inline(always)]
fn all_false_mask8x32(self, a: mask8x32<Self>) -> bool {
let (a0, a1) = self.split_mask8x32(a);
self.all_false_mask8x16(a0) && self.all_false_mask8x16(a1)
}
#[inline(always)]
fn combine_mask8x32(self, a: mask8x32<Self>, b: mask8x32<Self>) -> mask8x64<Self> {
mask8x64 {
val: crate::support::Aligned512(int8x16x4_t(
a.val.0.0, a.val.0.1, b.val.0.0, b.val.0.1,
)),
simd: self,
}
}
#[inline(always)]
fn split_mask8x32(self, a: mask8x32<Self>) -> (mask8x16<Self>, mask8x16<Self>) {
(
mask8x16 {
val: crate::support::Aligned128(a.val.0.0),
simd: self,
},
mask8x16 {
val: crate::support::Aligned128(a.val.0.1),
simd: self,
},
)
}
#[inline(always)]
fn splat_i16x16(self, val: i16) -> i16x16<Self> {
let half = self.splat_i16x8(val);
self.combine_i16x8(half, half)
}
#[inline(always)]
fn load_array_i16x16(self, val: [i16; 16usize]) -> i16x16<Self> {
i16x16 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i16x16(self, val: &[i16; 16usize]) -> i16x16<Self> {
i16x16 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_i16x16(self, a: i16x16<Self>) -> [i16; 16usize] {
unsafe { core::mem::transmute::<int16x8x2_t, [i16; 16usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_i16x16(self, a: &i16x16<Self>) -> &[i16; 16usize] {
unsafe { core::mem::transmute::<&int16x8x2_t, &[i16; 16usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_i16x16(self, a: &mut i16x16<Self>) -> &mut [i16; 16usize] {
unsafe { core::mem::transmute::<&mut int16x8x2_t, &mut [i16; 16usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_i16x16(self, a: i16x16<Self>, dest: &mut [i16; 16usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i16,
dest.as_mut_ptr(),
16usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_i16x16(self, a: u8x32<Self>) -> i16x16<Self> {
unsafe {
i16x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i16x16(self, a: i16x16<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i16x16<const SHIFT: usize>(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
if SHIFT >= 16usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_i16x16(a).val.0;
let b_bytes = self.cvt_to_bytes_i16x16(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1];
let b_blocks = [b_bytes.0, b_bytes.1];
let shift_bytes = SHIFT * 2usize;
uint8x16x2_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_i16x16(u8x32 {
val: crate::support::Aligned256(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_i16x16<const SHIFT: usize>(
self,
a: i16x16<Self>,
b: i16x16<Self>,
) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(
self.slide_within_blocks_i16x8::<SHIFT>(a0, b0),
self.slide_within_blocks_i16x8::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.add_i16x8(a0, b0), self.add_i16x8(a1, b1))
}
#[inline(always)]
fn sub_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.sub_i16x8(a0, b0), self.sub_i16x8(a1, b1))
}
#[inline(always)]
fn mul_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.mul_i16x8(a0, b0), self.mul_i16x8(a1, b1))
}
#[inline(always)]
fn and_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.and_i16x8(a0, b0), self.and_i16x8(a1, b1))
}
#[inline(always)]
fn or_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.or_i16x8(a0, b0), self.or_i16x8(a1, b1))
}
#[inline(always)]
fn xor_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.xor_i16x8(a0, b0), self.xor_i16x8(a1, b1))
}
#[inline(always)]
fn not_i16x16(self, a: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
self.combine_i16x8(self.not_i16x8(a0), self.not_i16x8(a1))
}
#[inline(always)]
fn shl_i16x16(self, a: i16x16<Self>, shift: u32) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
self.combine_i16x8(self.shl_i16x8(a0, shift), self.shl_i16x8(a1, shift))
}
#[inline(always)]
fn shlv_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.shlv_i16x8(a0, b0), self.shlv_i16x8(a1, b1))
}
#[inline(always)]
fn shr_i16x16(self, a: i16x16<Self>, shift: u32) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
self.combine_i16x8(self.shr_i16x8(a0, shift), self.shr_i16x8(a1, shift))
}
#[inline(always)]
fn shrv_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.shrv_i16x8(a0, b0), self.shrv_i16x8(a1, b1))
}
#[inline(always)]
fn simd_eq_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_mask16x8(self.simd_eq_i16x8(a0, b0), self.simd_eq_i16x8(a1, b1))
}
#[inline(always)]
fn simd_lt_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_mask16x8(self.simd_lt_i16x8(a0, b0), self.simd_lt_i16x8(a1, b1))
}
#[inline(always)]
fn simd_le_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_mask16x8(self.simd_le_i16x8(a0, b0), self.simd_le_i16x8(a1, b1))
}
#[inline(always)]
fn simd_ge_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_mask16x8(self.simd_ge_i16x8(a0, b0), self.simd_ge_i16x8(a1, b1))
}
#[inline(always)]
fn simd_gt_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_mask16x8(self.simd_gt_i16x8(a0, b0), self.simd_gt_i16x8(a1, b1))
}
#[inline(always)]
fn zip_low_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, _) = self.split_i16x16(a);
let (b0, _) = self.split_i16x16(b);
self.combine_i16x8(self.zip_low_i16x8(a0, b0), self.zip_high_i16x8(a0, b0))
}
#[inline(always)]
fn zip_high_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (_, a1) = self.split_i16x16(a);
let (_, b1) = self.split_i16x16(b);
self.combine_i16x8(self.zip_low_i16x8(a1, b1), self.zip_high_i16x8(a1, b1))
}
#[inline(always)]
fn unzip_low_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.unzip_low_i16x8(a0, a1), self.unzip_low_i16x8(b0, b1))
}
#[inline(always)]
fn unzip_high_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.unzip_high_i16x8(a0, a1), self.unzip_high_i16x8(b0, b1))
}
#[inline(always)]
fn select_i16x16(self, a: mask16x16<Self>, b: i16x16<Self>, c: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
let (b0, b1) = self.split_i16x16(b);
let (c0, c1) = self.split_i16x16(c);
self.combine_i16x8(self.select_i16x8(a0, b0, c0), self.select_i16x8(a1, b1, c1))
}
#[inline(always)]
fn min_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.min_i16x8(a0, b0), self.min_i16x8(a1, b1))
}
#[inline(always)]
fn max_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
let (b0, b1) = self.split_i16x16(b);
self.combine_i16x8(self.max_i16x8(a0, b0), self.max_i16x8(a1, b1))
}
#[inline(always)]
fn combine_i16x16(self, a: i16x16<Self>, b: i16x16<Self>) -> i16x32<Self> {
i16x32 {
val: crate::support::Aligned512(int16x8x4_t(
a.val.0.0, a.val.0.1, b.val.0.0, b.val.0.1,
)),
simd: self,
}
}
#[inline(always)]
fn split_i16x16(self, a: i16x16<Self>) -> (i16x8<Self>, i16x8<Self>) {
(
i16x8 {
val: crate::support::Aligned128(a.val.0.0),
simd: self,
},
i16x8 {
val: crate::support::Aligned128(a.val.0.1),
simd: self,
},
)
}
#[inline(always)]
fn neg_i16x16(self, a: i16x16<Self>) -> i16x16<Self> {
let (a0, a1) = self.split_i16x16(a);
self.combine_i16x8(self.neg_i16x8(a0), self.neg_i16x8(a1))
}
#[inline(always)]
fn reinterpret_u8_i16x16(self, a: i16x16<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_i16x16(a);
self.combine_u8x16(self.reinterpret_u8_i16x8(a0), self.reinterpret_u8_i16x8(a1))
}
#[inline(always)]
fn reinterpret_u32_i16x16(self, a: i16x16<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_i16x16(a);
self.combine_u32x4(
self.reinterpret_u32_i16x8(a0),
self.reinterpret_u32_i16x8(a1),
)
}
#[inline(always)]
fn splat_u16x16(self, val: u16) -> u16x16<Self> {
let half = self.splat_u16x8(val);
self.combine_u16x8(half, half)
}
#[inline(always)]
fn load_array_u16x16(self, val: [u16; 16usize]) -> u16x16<Self> {
u16x16 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u16x16(self, val: &[u16; 16usize]) -> u16x16<Self> {
u16x16 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_u16x16(self, a: u16x16<Self>) -> [u16; 16usize] {
unsafe { core::mem::transmute::<uint16x8x2_t, [u16; 16usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_u16x16(self, a: &u16x16<Self>) -> &[u16; 16usize] {
unsafe { core::mem::transmute::<&uint16x8x2_t, &[u16; 16usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_u16x16(self, a: &mut u16x16<Self>) -> &mut [u16; 16usize] {
unsafe { core::mem::transmute::<&mut uint16x8x2_t, &mut [u16; 16usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_u16x16(self, a: u16x16<Self>, dest: &mut [u16; 16usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const u16,
dest.as_mut_ptr(),
16usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_u16x16(self, a: u8x32<Self>) -> u16x16<Self> {
unsafe {
u16x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u16x16(self, a: u16x16<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u16x16<const SHIFT: usize>(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
if SHIFT >= 16usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_u16x16(a).val.0;
let b_bytes = self.cvt_to_bytes_u16x16(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1];
let b_blocks = [b_bytes.0, b_bytes.1];
let shift_bytes = SHIFT * 2usize;
uint8x16x2_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_u16x16(u8x32 {
val: crate::support::Aligned256(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_u16x16<const SHIFT: usize>(
self,
a: u16x16<Self>,
b: u16x16<Self>,
) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(
self.slide_within_blocks_u16x8::<SHIFT>(a0, b0),
self.slide_within_blocks_u16x8::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.add_u16x8(a0, b0), self.add_u16x8(a1, b1))
}
#[inline(always)]
fn sub_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.sub_u16x8(a0, b0), self.sub_u16x8(a1, b1))
}
#[inline(always)]
fn mul_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.mul_u16x8(a0, b0), self.mul_u16x8(a1, b1))
}
#[inline(always)]
fn and_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.and_u16x8(a0, b0), self.and_u16x8(a1, b1))
}
#[inline(always)]
fn or_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.or_u16x8(a0, b0), self.or_u16x8(a1, b1))
}
#[inline(always)]
fn xor_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.xor_u16x8(a0, b0), self.xor_u16x8(a1, b1))
}
#[inline(always)]
fn not_u16x16(self, a: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
self.combine_u16x8(self.not_u16x8(a0), self.not_u16x8(a1))
}
#[inline(always)]
fn shl_u16x16(self, a: u16x16<Self>, shift: u32) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
self.combine_u16x8(self.shl_u16x8(a0, shift), self.shl_u16x8(a1, shift))
}
#[inline(always)]
fn shlv_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.shlv_u16x8(a0, b0), self.shlv_u16x8(a1, b1))
}
#[inline(always)]
fn shr_u16x16(self, a: u16x16<Self>, shift: u32) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
self.combine_u16x8(self.shr_u16x8(a0, shift), self.shr_u16x8(a1, shift))
}
#[inline(always)]
fn shrv_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.shrv_u16x8(a0, b0), self.shrv_u16x8(a1, b1))
}
#[inline(always)]
fn simd_eq_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_mask16x8(self.simd_eq_u16x8(a0, b0), self.simd_eq_u16x8(a1, b1))
}
#[inline(always)]
fn simd_lt_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_mask16x8(self.simd_lt_u16x8(a0, b0), self.simd_lt_u16x8(a1, b1))
}
#[inline(always)]
fn simd_le_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_mask16x8(self.simd_le_u16x8(a0, b0), self.simd_le_u16x8(a1, b1))
}
#[inline(always)]
fn simd_ge_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_mask16x8(self.simd_ge_u16x8(a0, b0), self.simd_ge_u16x8(a1, b1))
}
#[inline(always)]
fn simd_gt_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_mask16x8(self.simd_gt_u16x8(a0, b0), self.simd_gt_u16x8(a1, b1))
}
#[inline(always)]
fn zip_low_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, _) = self.split_u16x16(a);
let (b0, _) = self.split_u16x16(b);
self.combine_u16x8(self.zip_low_u16x8(a0, b0), self.zip_high_u16x8(a0, b0))
}
#[inline(always)]
fn zip_high_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (_, a1) = self.split_u16x16(a);
let (_, b1) = self.split_u16x16(b);
self.combine_u16x8(self.zip_low_u16x8(a1, b1), self.zip_high_u16x8(a1, b1))
}
#[inline(always)]
fn unzip_low_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.unzip_low_u16x8(a0, a1), self.unzip_low_u16x8(b0, b1))
}
#[inline(always)]
fn unzip_high_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.unzip_high_u16x8(a0, a1), self.unzip_high_u16x8(b0, b1))
}
#[inline(always)]
fn select_u16x16(self, a: mask16x16<Self>, b: u16x16<Self>, c: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
let (b0, b1) = self.split_u16x16(b);
let (c0, c1) = self.split_u16x16(c);
self.combine_u16x8(self.select_u16x8(a0, b0, c0), self.select_u16x8(a1, b1, c1))
}
#[inline(always)]
fn min_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.min_u16x8(a0, b0), self.min_u16x8(a1, b1))
}
#[inline(always)]
fn max_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x16<Self> {
let (a0, a1) = self.split_u16x16(a);
let (b0, b1) = self.split_u16x16(b);
self.combine_u16x8(self.max_u16x8(a0, b0), self.max_u16x8(a1, b1))
}
#[inline(always)]
fn combine_u16x16(self, a: u16x16<Self>, b: u16x16<Self>) -> u16x32<Self> {
u16x32 {
val: crate::support::Aligned512(uint16x8x4_t(
a.val.0.0, a.val.0.1, b.val.0.0, b.val.0.1,
)),
simd: self,
}
}
#[inline(always)]
fn split_u16x16(self, a: u16x16<Self>) -> (u16x8<Self>, u16x8<Self>) {
(
u16x8 {
val: crate::support::Aligned128(a.val.0.0),
simd: self,
},
u16x8 {
val: crate::support::Aligned128(a.val.0.1),
simd: self,
},
)
}
#[inline(always)]
fn narrow_u16x16(self, a: u16x16<Self>) -> u8x16<Self> {
unsafe {
let converted: uint16x8x2_t = a.into();
let low = vmovn_u16(converted.0);
let high = vmovn_u16(converted.1);
vcombine_u8(low, high).simd_into(self)
}
}
#[inline(always)]
fn reinterpret_u8_u16x16(self, a: u16x16<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u16x16(a);
self.combine_u8x16(self.reinterpret_u8_u16x8(a0), self.reinterpret_u8_u16x8(a1))
}
#[inline(always)]
fn reinterpret_u32_u16x16(self, a: u16x16<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u16x16(a);
self.combine_u32x4(
self.reinterpret_u32_u16x8(a0),
self.reinterpret_u32_u16x8(a1),
)
}
#[inline(always)]
fn splat_mask16x16(self, val: i16) -> mask16x16<Self> {
let half = self.splat_mask16x8(val);
self.combine_mask16x8(half, half)
}
#[inline(always)]
fn load_array_mask16x16(self, val: [i16; 16usize]) -> mask16x16<Self> {
mask16x16 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask16x16(self, val: &[i16; 16usize]) -> mask16x16<Self> {
mask16x16 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_mask16x16(self, a: mask16x16<Self>) -> [i16; 16usize] {
unsafe { core::mem::transmute::<int16x8x2_t, [i16; 16usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_mask16x16(self, a: &mask16x16<Self>) -> &[i16; 16usize] {
unsafe { core::mem::transmute::<&int16x8x2_t, &[i16; 16usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_mask16x16(self, a: &mut mask16x16<Self>) -> &mut [i16; 16usize] {
unsafe { core::mem::transmute::<&mut int16x8x2_t, &mut [i16; 16usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_mask16x16(self, a: mask16x16<Self>, dest: &mut [i16; 16usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i16,
dest.as_mut_ptr(),
16usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_mask16x16(self, a: u8x32<Self>) -> mask16x16<Self> {
unsafe {
mask16x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask16x16(self, a: mask16x16<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask16x16<const SHIFT: usize>(
self,
a: mask16x16<Self>,
b: mask16x16<Self>,
) -> mask16x16<Self> {
if SHIFT >= 16usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_mask16x16(a).val.0;
let b_bytes = self.cvt_to_bytes_mask16x16(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1];
let b_blocks = [b_bytes.0, b_bytes.1];
let shift_bytes = SHIFT * 2usize;
uint8x16x2_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_mask16x16(u8x32 {
val: crate::support::Aligned256(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_mask16x16<const SHIFT: usize>(
self,
a: mask16x16<Self>,
b: mask16x16<Self>,
) -> mask16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
let (b0, b1) = self.split_mask16x16(b);
self.combine_mask16x8(
self.slide_within_blocks_mask16x8::<SHIFT>(a0, b0),
self.slide_within_blocks_mask16x8::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn and_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
let (b0, b1) = self.split_mask16x16(b);
self.combine_mask16x8(self.and_mask16x8(a0, b0), self.and_mask16x8(a1, b1))
}
#[inline(always)]
fn or_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
let (b0, b1) = self.split_mask16x16(b);
self.combine_mask16x8(self.or_mask16x8(a0, b0), self.or_mask16x8(a1, b1))
}
#[inline(always)]
fn xor_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
let (b0, b1) = self.split_mask16x16(b);
self.combine_mask16x8(self.xor_mask16x8(a0, b0), self.xor_mask16x8(a1, b1))
}
#[inline(always)]
fn not_mask16x16(self, a: mask16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
self.combine_mask16x8(self.not_mask16x8(a0), self.not_mask16x8(a1))
}
#[inline(always)]
fn select_mask16x16(
self,
a: mask16x16<Self>,
b: mask16x16<Self>,
c: mask16x16<Self>,
) -> mask16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
let (b0, b1) = self.split_mask16x16(b);
let (c0, c1) = self.split_mask16x16(c);
self.combine_mask16x8(
self.select_mask16x8(a0, b0, c0),
self.select_mask16x8(a1, b1, c1),
)
}
#[inline(always)]
fn simd_eq_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x16<Self> {
let (a0, a1) = self.split_mask16x16(a);
let (b0, b1) = self.split_mask16x16(b);
self.combine_mask16x8(self.simd_eq_mask16x8(a0, b0), self.simd_eq_mask16x8(a1, b1))
}
#[inline(always)]
fn any_true_mask16x16(self, a: mask16x16<Self>) -> bool {
let (a0, a1) = self.split_mask16x16(a);
self.any_true_mask16x8(a0) || self.any_true_mask16x8(a1)
}
#[inline(always)]
fn all_true_mask16x16(self, a: mask16x16<Self>) -> bool {
let (a0, a1) = self.split_mask16x16(a);
self.all_true_mask16x8(a0) && self.all_true_mask16x8(a1)
}
#[inline(always)]
fn any_false_mask16x16(self, a: mask16x16<Self>) -> bool {
let (a0, a1) = self.split_mask16x16(a);
self.any_false_mask16x8(a0) || self.any_false_mask16x8(a1)
}
#[inline(always)]
fn all_false_mask16x16(self, a: mask16x16<Self>) -> bool {
let (a0, a1) = self.split_mask16x16(a);
self.all_false_mask16x8(a0) && self.all_false_mask16x8(a1)
}
#[inline(always)]
fn combine_mask16x16(self, a: mask16x16<Self>, b: mask16x16<Self>) -> mask16x32<Self> {
mask16x32 {
val: crate::support::Aligned512(int16x8x4_t(
a.val.0.0, a.val.0.1, b.val.0.0, b.val.0.1,
)),
simd: self,
}
}
#[inline(always)]
fn split_mask16x16(self, a: mask16x16<Self>) -> (mask16x8<Self>, mask16x8<Self>) {
(
mask16x8 {
val: crate::support::Aligned128(a.val.0.0),
simd: self,
},
mask16x8 {
val: crate::support::Aligned128(a.val.0.1),
simd: self,
},
)
}
#[inline(always)]
fn splat_i32x8(self, val: i32) -> i32x8<Self> {
let half = self.splat_i32x4(val);
self.combine_i32x4(half, half)
}
#[inline(always)]
fn load_array_i32x8(self, val: [i32; 8usize]) -> i32x8<Self> {
i32x8 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i32x8(self, val: &[i32; 8usize]) -> i32x8<Self> {
i32x8 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_i32x8(self, a: i32x8<Self>) -> [i32; 8usize] {
unsafe { core::mem::transmute::<int32x4x2_t, [i32; 8usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_i32x8(self, a: &i32x8<Self>) -> &[i32; 8usize] {
unsafe { core::mem::transmute::<&int32x4x2_t, &[i32; 8usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_i32x8(self, a: &mut i32x8<Self>) -> &mut [i32; 8usize] {
unsafe { core::mem::transmute::<&mut int32x4x2_t, &mut [i32; 8usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_i32x8(self, a: i32x8<Self>, dest: &mut [i32; 8usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i32,
dest.as_mut_ptr(),
8usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_i32x8(self, a: u8x32<Self>) -> i32x8<Self> {
unsafe {
i32x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i32x8(self, a: i32x8<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i32x8<const SHIFT: usize>(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
if SHIFT >= 8usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_i32x8(a).val.0;
let b_bytes = self.cvt_to_bytes_i32x8(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1];
let b_blocks = [b_bytes.0, b_bytes.1];
let shift_bytes = SHIFT * 4usize;
uint8x16x2_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_i32x8(u8x32 {
val: crate::support::Aligned256(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_i32x8<const SHIFT: usize>(
self,
a: i32x8<Self>,
b: i32x8<Self>,
) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(
self.slide_within_blocks_i32x4::<SHIFT>(a0, b0),
self.slide_within_blocks_i32x4::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.add_i32x4(a0, b0), self.add_i32x4(a1, b1))
}
#[inline(always)]
fn sub_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.sub_i32x4(a0, b0), self.sub_i32x4(a1, b1))
}
#[inline(always)]
fn mul_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.mul_i32x4(a0, b0), self.mul_i32x4(a1, b1))
}
#[inline(always)]
fn and_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.and_i32x4(a0, b0), self.and_i32x4(a1, b1))
}
#[inline(always)]
fn or_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.or_i32x4(a0, b0), self.or_i32x4(a1, b1))
}
#[inline(always)]
fn xor_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.xor_i32x4(a0, b0), self.xor_i32x4(a1, b1))
}
#[inline(always)]
fn not_i32x8(self, a: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
self.combine_i32x4(self.not_i32x4(a0), self.not_i32x4(a1))
}
#[inline(always)]
fn shl_i32x8(self, a: i32x8<Self>, shift: u32) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
self.combine_i32x4(self.shl_i32x4(a0, shift), self.shl_i32x4(a1, shift))
}
#[inline(always)]
fn shlv_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.shlv_i32x4(a0, b0), self.shlv_i32x4(a1, b1))
}
#[inline(always)]
fn shr_i32x8(self, a: i32x8<Self>, shift: u32) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
self.combine_i32x4(self.shr_i32x4(a0, shift), self.shr_i32x4(a1, shift))
}
#[inline(always)]
fn shrv_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.shrv_i32x4(a0, b0), self.shrv_i32x4(a1, b1))
}
#[inline(always)]
fn simd_eq_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_mask32x4(self.simd_eq_i32x4(a0, b0), self.simd_eq_i32x4(a1, b1))
}
#[inline(always)]
fn simd_lt_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_mask32x4(self.simd_lt_i32x4(a0, b0), self.simd_lt_i32x4(a1, b1))
}
#[inline(always)]
fn simd_le_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_mask32x4(self.simd_le_i32x4(a0, b0), self.simd_le_i32x4(a1, b1))
}
#[inline(always)]
fn simd_ge_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_mask32x4(self.simd_ge_i32x4(a0, b0), self.simd_ge_i32x4(a1, b1))
}
#[inline(always)]
fn simd_gt_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_mask32x4(self.simd_gt_i32x4(a0, b0), self.simd_gt_i32x4(a1, b1))
}
#[inline(always)]
fn zip_low_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, _) = self.split_i32x8(a);
let (b0, _) = self.split_i32x8(b);
self.combine_i32x4(self.zip_low_i32x4(a0, b0), self.zip_high_i32x4(a0, b0))
}
#[inline(always)]
fn zip_high_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (_, a1) = self.split_i32x8(a);
let (_, b1) = self.split_i32x8(b);
self.combine_i32x4(self.zip_low_i32x4(a1, b1), self.zip_high_i32x4(a1, b1))
}
#[inline(always)]
fn unzip_low_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.unzip_low_i32x4(a0, a1), self.unzip_low_i32x4(b0, b1))
}
#[inline(always)]
fn unzip_high_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.unzip_high_i32x4(a0, a1), self.unzip_high_i32x4(b0, b1))
}
#[inline(always)]
fn select_i32x8(self, a: mask32x8<Self>, b: i32x8<Self>, c: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_i32x8(b);
let (c0, c1) = self.split_i32x8(c);
self.combine_i32x4(self.select_i32x4(a0, b0, c0), self.select_i32x4(a1, b1, c1))
}
#[inline(always)]
fn min_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.min_i32x4(a0, b0), self.min_i32x4(a1, b1))
}
#[inline(always)]
fn max_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
let (b0, b1) = self.split_i32x8(b);
self.combine_i32x4(self.max_i32x4(a0, b0), self.max_i32x4(a1, b1))
}
#[inline(always)]
fn combine_i32x8(self, a: i32x8<Self>, b: i32x8<Self>) -> i32x16<Self> {
i32x16 {
val: crate::support::Aligned512(int32x4x4_t(
a.val.0.0, a.val.0.1, b.val.0.0, b.val.0.1,
)),
simd: self,
}
}
#[inline(always)]
fn split_i32x8(self, a: i32x8<Self>) -> (i32x4<Self>, i32x4<Self>) {
(
i32x4 {
val: crate::support::Aligned128(a.val.0.0),
simd: self,
},
i32x4 {
val: crate::support::Aligned128(a.val.0.1),
simd: self,
},
)
}
#[inline(always)]
fn neg_i32x8(self, a: i32x8<Self>) -> i32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
self.combine_i32x4(self.neg_i32x4(a0), self.neg_i32x4(a1))
}
#[inline(always)]
fn reinterpret_u8_i32x8(self, a: i32x8<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_i32x8(a);
self.combine_u8x16(self.reinterpret_u8_i32x4(a0), self.reinterpret_u8_i32x4(a1))
}
#[inline(always)]
fn reinterpret_u32_i32x8(self, a: i32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
self.combine_u32x4(
self.reinterpret_u32_i32x4(a0),
self.reinterpret_u32_i32x4(a1),
)
}
#[inline(always)]
fn cvt_f32_i32x8(self, a: i32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_i32x8(a);
self.combine_f32x4(self.cvt_f32_i32x4(a0), self.cvt_f32_i32x4(a1))
}
#[inline(always)]
fn splat_u32x8(self, val: u32) -> u32x8<Self> {
let half = self.splat_u32x4(val);
self.combine_u32x4(half, half)
}
#[inline(always)]
fn load_array_u32x8(self, val: [u32; 8usize]) -> u32x8<Self> {
u32x8 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u32x8(self, val: &[u32; 8usize]) -> u32x8<Self> {
u32x8 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_u32x8(self, a: u32x8<Self>) -> [u32; 8usize] {
unsafe { core::mem::transmute::<uint32x4x2_t, [u32; 8usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_u32x8(self, a: &u32x8<Self>) -> &[u32; 8usize] {
unsafe { core::mem::transmute::<&uint32x4x2_t, &[u32; 8usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_u32x8(self, a: &mut u32x8<Self>) -> &mut [u32; 8usize] {
unsafe { core::mem::transmute::<&mut uint32x4x2_t, &mut [u32; 8usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_u32x8(self, a: u32x8<Self>, dest: &mut [u32; 8usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const u32,
dest.as_mut_ptr(),
8usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_u32x8(self, a: u8x32<Self>) -> u32x8<Self> {
unsafe {
u32x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u32x8(self, a: u32x8<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u32x8<const SHIFT: usize>(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
if SHIFT >= 8usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_u32x8(a).val.0;
let b_bytes = self.cvt_to_bytes_u32x8(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1];
let b_blocks = [b_bytes.0, b_bytes.1];
let shift_bytes = SHIFT * 4usize;
uint8x16x2_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_u32x8(u8x32 {
val: crate::support::Aligned256(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_u32x8<const SHIFT: usize>(
self,
a: u32x8<Self>,
b: u32x8<Self>,
) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(
self.slide_within_blocks_u32x4::<SHIFT>(a0, b0),
self.slide_within_blocks_u32x4::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.add_u32x4(a0, b0), self.add_u32x4(a1, b1))
}
#[inline(always)]
fn sub_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.sub_u32x4(a0, b0), self.sub_u32x4(a1, b1))
}
#[inline(always)]
fn mul_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.mul_u32x4(a0, b0), self.mul_u32x4(a1, b1))
}
#[inline(always)]
fn and_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.and_u32x4(a0, b0), self.and_u32x4(a1, b1))
}
#[inline(always)]
fn or_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.or_u32x4(a0, b0), self.or_u32x4(a1, b1))
}
#[inline(always)]
fn xor_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.xor_u32x4(a0, b0), self.xor_u32x4(a1, b1))
}
#[inline(always)]
fn not_u32x8(self, a: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
self.combine_u32x4(self.not_u32x4(a0), self.not_u32x4(a1))
}
#[inline(always)]
fn shl_u32x8(self, a: u32x8<Self>, shift: u32) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
self.combine_u32x4(self.shl_u32x4(a0, shift), self.shl_u32x4(a1, shift))
}
#[inline(always)]
fn shlv_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.shlv_u32x4(a0, b0), self.shlv_u32x4(a1, b1))
}
#[inline(always)]
fn shr_u32x8(self, a: u32x8<Self>, shift: u32) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
self.combine_u32x4(self.shr_u32x4(a0, shift), self.shr_u32x4(a1, shift))
}
#[inline(always)]
fn shrv_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.shrv_u32x4(a0, b0), self.shrv_u32x4(a1, b1))
}
#[inline(always)]
fn simd_eq_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_mask32x4(self.simd_eq_u32x4(a0, b0), self.simd_eq_u32x4(a1, b1))
}
#[inline(always)]
fn simd_lt_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_mask32x4(self.simd_lt_u32x4(a0, b0), self.simd_lt_u32x4(a1, b1))
}
#[inline(always)]
fn simd_le_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_mask32x4(self.simd_le_u32x4(a0, b0), self.simd_le_u32x4(a1, b1))
}
#[inline(always)]
fn simd_ge_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_mask32x4(self.simd_ge_u32x4(a0, b0), self.simd_ge_u32x4(a1, b1))
}
#[inline(always)]
fn simd_gt_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_mask32x4(self.simd_gt_u32x4(a0, b0), self.simd_gt_u32x4(a1, b1))
}
#[inline(always)]
fn zip_low_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, _) = self.split_u32x8(a);
let (b0, _) = self.split_u32x8(b);
self.combine_u32x4(self.zip_low_u32x4(a0, b0), self.zip_high_u32x4(a0, b0))
}
#[inline(always)]
fn zip_high_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (_, a1) = self.split_u32x8(a);
let (_, b1) = self.split_u32x8(b);
self.combine_u32x4(self.zip_low_u32x4(a1, b1), self.zip_high_u32x4(a1, b1))
}
#[inline(always)]
fn unzip_low_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.unzip_low_u32x4(a0, a1), self.unzip_low_u32x4(b0, b1))
}
#[inline(always)]
fn unzip_high_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.unzip_high_u32x4(a0, a1), self.unzip_high_u32x4(b0, b1))
}
#[inline(always)]
fn select_u32x8(self, a: mask32x8<Self>, b: u32x8<Self>, c: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_u32x8(b);
let (c0, c1) = self.split_u32x8(c);
self.combine_u32x4(self.select_u32x4(a0, b0, c0), self.select_u32x4(a1, b1, c1))
}
#[inline(always)]
fn min_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.min_u32x4(a0, b0), self.min_u32x4(a1, b1))
}
#[inline(always)]
fn max_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
let (b0, b1) = self.split_u32x8(b);
self.combine_u32x4(self.max_u32x4(a0, b0), self.max_u32x4(a1, b1))
}
#[inline(always)]
fn combine_u32x8(self, a: u32x8<Self>, b: u32x8<Self>) -> u32x16<Self> {
u32x16 {
val: crate::support::Aligned512(uint32x4x4_t(
a.val.0.0, a.val.0.1, b.val.0.0, b.val.0.1,
)),
simd: self,
}
}
#[inline(always)]
fn split_u32x8(self, a: u32x8<Self>) -> (u32x4<Self>, u32x4<Self>) {
(
u32x4 {
val: crate::support::Aligned128(a.val.0.0),
simd: self,
},
u32x4 {
val: crate::support::Aligned128(a.val.0.1),
simd: self,
},
)
}
#[inline(always)]
fn reinterpret_u8_u32x8(self, a: u32x8<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u32x8(a);
self.combine_u8x16(self.reinterpret_u8_u32x4(a0), self.reinterpret_u8_u32x4(a1))
}
#[inline(always)]
fn cvt_f32_u32x8(self, a: u32x8<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_u32x8(a);
self.combine_f32x4(self.cvt_f32_u32x4(a0), self.cvt_f32_u32x4(a1))
}
#[inline(always)]
fn splat_mask32x8(self, val: i32) -> mask32x8<Self> {
let half = self.splat_mask32x4(val);
self.combine_mask32x4(half, half)
}
#[inline(always)]
fn load_array_mask32x8(self, val: [i32; 8usize]) -> mask32x8<Self> {
mask32x8 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask32x8(self, val: &[i32; 8usize]) -> mask32x8<Self> {
mask32x8 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_mask32x8(self, a: mask32x8<Self>) -> [i32; 8usize] {
unsafe { core::mem::transmute::<int32x4x2_t, [i32; 8usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_mask32x8(self, a: &mask32x8<Self>) -> &[i32; 8usize] {
unsafe { core::mem::transmute::<&int32x4x2_t, &[i32; 8usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_mask32x8(self, a: &mut mask32x8<Self>) -> &mut [i32; 8usize] {
unsafe { core::mem::transmute::<&mut int32x4x2_t, &mut [i32; 8usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_mask32x8(self, a: mask32x8<Self>, dest: &mut [i32; 8usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i32,
dest.as_mut_ptr(),
8usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_mask32x8(self, a: u8x32<Self>) -> mask32x8<Self> {
unsafe {
mask32x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask32x8(self, a: mask32x8<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask32x8<const SHIFT: usize>(
self,
a: mask32x8<Self>,
b: mask32x8<Self>,
) -> mask32x8<Self> {
if SHIFT >= 8usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_mask32x8(a).val.0;
let b_bytes = self.cvt_to_bytes_mask32x8(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1];
let b_blocks = [b_bytes.0, b_bytes.1];
let shift_bytes = SHIFT * 4usize;
uint8x16x2_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_mask32x8(u8x32 {
val: crate::support::Aligned256(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_mask32x8<const SHIFT: usize>(
self,
a: mask32x8<Self>,
b: mask32x8<Self>,
) -> mask32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_mask32x8(b);
self.combine_mask32x4(
self.slide_within_blocks_mask32x4::<SHIFT>(a0, b0),
self.slide_within_blocks_mask32x4::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn and_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_mask32x8(b);
self.combine_mask32x4(self.and_mask32x4(a0, b0), self.and_mask32x4(a1, b1))
}
#[inline(always)]
fn or_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_mask32x8(b);
self.combine_mask32x4(self.or_mask32x4(a0, b0), self.or_mask32x4(a1, b1))
}
#[inline(always)]
fn xor_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_mask32x8(b);
self.combine_mask32x4(self.xor_mask32x4(a0, b0), self.xor_mask32x4(a1, b1))
}
#[inline(always)]
fn not_mask32x8(self, a: mask32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
self.combine_mask32x4(self.not_mask32x4(a0), self.not_mask32x4(a1))
}
#[inline(always)]
fn select_mask32x8(
self,
a: mask32x8<Self>,
b: mask32x8<Self>,
c: mask32x8<Self>,
) -> mask32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_mask32x8(b);
let (c0, c1) = self.split_mask32x8(c);
self.combine_mask32x4(
self.select_mask32x4(a0, b0, c0),
self.select_mask32x4(a1, b1, c1),
)
}
#[inline(always)]
fn simd_eq_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x8<Self> {
let (a0, a1) = self.split_mask32x8(a);
let (b0, b1) = self.split_mask32x8(b);
self.combine_mask32x4(self.simd_eq_mask32x4(a0, b0), self.simd_eq_mask32x4(a1, b1))
}
#[inline(always)]
fn any_true_mask32x8(self, a: mask32x8<Self>) -> bool {
let (a0, a1) = self.split_mask32x8(a);
self.any_true_mask32x4(a0) || self.any_true_mask32x4(a1)
}
#[inline(always)]
fn all_true_mask32x8(self, a: mask32x8<Self>) -> bool {
let (a0, a1) = self.split_mask32x8(a);
self.all_true_mask32x4(a0) && self.all_true_mask32x4(a1)
}
#[inline(always)]
fn any_false_mask32x8(self, a: mask32x8<Self>) -> bool {
let (a0, a1) = self.split_mask32x8(a);
self.any_false_mask32x4(a0) || self.any_false_mask32x4(a1)
}
#[inline(always)]
fn all_false_mask32x8(self, a: mask32x8<Self>) -> bool {
let (a0, a1) = self.split_mask32x8(a);
self.all_false_mask32x4(a0) && self.all_false_mask32x4(a1)
}
#[inline(always)]
fn combine_mask32x8(self, a: mask32x8<Self>, b: mask32x8<Self>) -> mask32x16<Self> {
mask32x16 {
val: crate::support::Aligned512(int32x4x4_t(
a.val.0.0, a.val.0.1, b.val.0.0, b.val.0.1,
)),
simd: self,
}
}
#[inline(always)]
fn split_mask32x8(self, a: mask32x8<Self>) -> (mask32x4<Self>, mask32x4<Self>) {
(
mask32x4 {
val: crate::support::Aligned128(a.val.0.0),
simd: self,
},
mask32x4 {
val: crate::support::Aligned128(a.val.0.1),
simd: self,
},
)
}
#[inline(always)]
fn splat_f64x4(self, val: f64) -> f64x4<Self> {
let half = self.splat_f64x2(val);
self.combine_f64x2(half, half)
}
#[inline(always)]
fn load_array_f64x4(self, val: [f64; 4usize]) -> f64x4<Self> {
f64x4 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_f64x4(self, val: &[f64; 4usize]) -> f64x4<Self> {
f64x4 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_f64x4(self, a: f64x4<Self>) -> [f64; 4usize] {
unsafe { core::mem::transmute::<float64x2x2_t, [f64; 4usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_f64x4(self, a: &f64x4<Self>) -> &[f64; 4usize] {
unsafe { core::mem::transmute::<&float64x2x2_t, &[f64; 4usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_f64x4(self, a: &mut f64x4<Self>) -> &mut [f64; 4usize] {
unsafe { core::mem::transmute::<&mut float64x2x2_t, &mut [f64; 4usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_f64x4(self, a: f64x4<Self>, dest: &mut [f64; 4usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const f64,
dest.as_mut_ptr(),
4usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_f64x4(self, a: u8x32<Self>) -> f64x4<Self> {
unsafe {
f64x4 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_f64x4(self, a: f64x4<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_f64x4<const SHIFT: usize>(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
if SHIFT >= 4usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_f64x4(a).val.0;
let b_bytes = self.cvt_to_bytes_f64x4(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1];
let b_blocks = [b_bytes.0, b_bytes.1];
let shift_bytes = SHIFT * 8usize;
uint8x16x2_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_f64x4(u8x32 {
val: crate::support::Aligned256(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_f64x4<const SHIFT: usize>(
self,
a: f64x4<Self>,
b: f64x4<Self>,
) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(
self.slide_within_blocks_f64x2::<SHIFT>(a0, b0),
self.slide_within_blocks_f64x2::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn abs_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f64x2(self.abs_f64x2(a0), self.abs_f64x2(a1))
}
#[inline(always)]
fn neg_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f64x2(self.neg_f64x2(a0), self.neg_f64x2(a1))
}
#[inline(always)]
fn sqrt_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f64x2(self.sqrt_f64x2(a0), self.sqrt_f64x2(a1))
}
#[inline(always)]
fn add_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.add_f64x2(a0, b0), self.add_f64x2(a1, b1))
}
#[inline(always)]
fn sub_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.sub_f64x2(a0, b0), self.sub_f64x2(a1, b1))
}
#[inline(always)]
fn mul_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.mul_f64x2(a0, b0), self.mul_f64x2(a1, b1))
}
#[inline(always)]
fn div_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.div_f64x2(a0, b0), self.div_f64x2(a1, b1))
}
#[inline(always)]
fn copysign_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.copysign_f64x2(a0, b0), self.copysign_f64x2(a1, b1))
}
#[inline(always)]
fn simd_eq_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_mask64x2(self.simd_eq_f64x2(a0, b0), self.simd_eq_f64x2(a1, b1))
}
#[inline(always)]
fn simd_lt_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_mask64x2(self.simd_lt_f64x2(a0, b0), self.simd_lt_f64x2(a1, b1))
}
#[inline(always)]
fn simd_le_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_mask64x2(self.simd_le_f64x2(a0, b0), self.simd_le_f64x2(a1, b1))
}
#[inline(always)]
fn simd_ge_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_mask64x2(self.simd_ge_f64x2(a0, b0), self.simd_ge_f64x2(a1, b1))
}
#[inline(always)]
fn simd_gt_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_mask64x2(self.simd_gt_f64x2(a0, b0), self.simd_gt_f64x2(a1, b1))
}
#[inline(always)]
fn zip_low_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, _) = self.split_f64x4(a);
let (b0, _) = self.split_f64x4(b);
self.combine_f64x2(self.zip_low_f64x2(a0, b0), self.zip_high_f64x2(a0, b0))
}
#[inline(always)]
fn zip_high_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (_, a1) = self.split_f64x4(a);
let (_, b1) = self.split_f64x4(b);
self.combine_f64x2(self.zip_low_f64x2(a1, b1), self.zip_high_f64x2(a1, b1))
}
#[inline(always)]
fn unzip_low_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.unzip_low_f64x2(a0, a1), self.unzip_low_f64x2(b0, b1))
}
#[inline(always)]
fn unzip_high_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.unzip_high_f64x2(a0, a1), self.unzip_high_f64x2(b0, b1))
}
#[inline(always)]
fn max_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.max_f64x2(a0, b0), self.max_f64x2(a1, b1))
}
#[inline(always)]
fn min_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(self.min_f64x2(a0, b0), self.min_f64x2(a1, b1))
}
#[inline(always)]
fn max_precise_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(
self.max_precise_f64x2(a0, b0),
self.max_precise_f64x2(a1, b1),
)
}
#[inline(always)]
fn min_precise_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
self.combine_f64x2(
self.min_precise_f64x2(a0, b0),
self.min_precise_f64x2(a1, b1),
)
}
#[inline(always)]
fn mul_add_f64x4(self, a: f64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
let (c0, c1) = self.split_f64x4(c);
self.combine_f64x2(
self.mul_add_f64x2(a0, b0, c0),
self.mul_add_f64x2(a1, b1, c1),
)
}
#[inline(always)]
fn mul_sub_f64x4(self, a: f64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
let (b0, b1) = self.split_f64x4(b);
let (c0, c1) = self.split_f64x4(c);
self.combine_f64x2(
self.mul_sub_f64x2(a0, b0, c0),
self.mul_sub_f64x2(a1, b1, c1),
)
}
#[inline(always)]
fn floor_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f64x2(self.floor_f64x2(a0), self.floor_f64x2(a1))
}
#[inline(always)]
fn ceil_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f64x2(self.ceil_f64x2(a0), self.ceil_f64x2(a1))
}
#[inline(always)]
fn round_ties_even_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f64x2(
self.round_ties_even_f64x2(a0),
self.round_ties_even_f64x2(a1),
)
}
#[inline(always)]
fn fract_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f64x2(self.fract_f64x2(a0), self.fract_f64x2(a1))
}
#[inline(always)]
fn trunc_f64x4(self, a: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f64x2(self.trunc_f64x2(a0), self.trunc_f64x2(a1))
}
#[inline(always)]
fn select_f64x4(self, a: mask64x4<Self>, b: f64x4<Self>, c: f64x4<Self>) -> f64x4<Self> {
let (a0, a1) = self.split_mask64x4(a);
let (b0, b1) = self.split_f64x4(b);
let (c0, c1) = self.split_f64x4(c);
self.combine_f64x2(self.select_f64x2(a0, b0, c0), self.select_f64x2(a1, b1, c1))
}
#[inline(always)]
fn combine_f64x4(self, a: f64x4<Self>, b: f64x4<Self>) -> f64x8<Self> {
f64x8 {
val: crate::support::Aligned512(float64x2x4_t(
a.val.0.0, a.val.0.1, b.val.0.0, b.val.0.1,
)),
simd: self,
}
}
#[inline(always)]
fn split_f64x4(self, a: f64x4<Self>) -> (f64x2<Self>, f64x2<Self>) {
(
f64x2 {
val: crate::support::Aligned128(a.val.0.0),
simd: self,
},
f64x2 {
val: crate::support::Aligned128(a.val.0.1),
simd: self,
},
)
}
#[inline(always)]
fn reinterpret_f32_f64x4(self, a: f64x4<Self>) -> f32x8<Self> {
let (a0, a1) = self.split_f64x4(a);
self.combine_f32x4(
self.reinterpret_f32_f64x2(a0),
self.reinterpret_f32_f64x2(a1),
)
}
#[inline(always)]
fn splat_mask64x4(self, val: i64) -> mask64x4<Self> {
let half = self.splat_mask64x2(val);
self.combine_mask64x2(half, half)
}
#[inline(always)]
fn load_array_mask64x4(self, val: [i64; 4usize]) -> mask64x4<Self> {
mask64x4 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask64x4(self, val: &[i64; 4usize]) -> mask64x4<Self> {
mask64x4 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_mask64x4(self, a: mask64x4<Self>) -> [i64; 4usize] {
unsafe { core::mem::transmute::<int64x2x2_t, [i64; 4usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_mask64x4(self, a: &mask64x4<Self>) -> &[i64; 4usize] {
unsafe { core::mem::transmute::<&int64x2x2_t, &[i64; 4usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_mask64x4(self, a: &mut mask64x4<Self>) -> &mut [i64; 4usize] {
unsafe { core::mem::transmute::<&mut int64x2x2_t, &mut [i64; 4usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_mask64x4(self, a: mask64x4<Self>, dest: &mut [i64; 4usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i64,
dest.as_mut_ptr(),
4usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_mask64x4(self, a: u8x32<Self>) -> mask64x4<Self> {
unsafe {
mask64x4 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask64x4(self, a: mask64x4<Self>) -> u8x32<Self> {
unsafe {
u8x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask64x4<const SHIFT: usize>(
self,
a: mask64x4<Self>,
b: mask64x4<Self>,
) -> mask64x4<Self> {
if SHIFT >= 4usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_mask64x4(a).val.0;
let b_bytes = self.cvt_to_bytes_mask64x4(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1];
let b_blocks = [b_bytes.0, b_bytes.1];
let shift_bytes = SHIFT * 8usize;
uint8x16x2_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_mask64x4(u8x32 {
val: crate::support::Aligned256(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_mask64x4<const SHIFT: usize>(
self,
a: mask64x4<Self>,
b: mask64x4<Self>,
) -> mask64x4<Self> {
let (a0, a1) = self.split_mask64x4(a);
let (b0, b1) = self.split_mask64x4(b);
self.combine_mask64x2(
self.slide_within_blocks_mask64x2::<SHIFT>(a0, b0),
self.slide_within_blocks_mask64x2::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn and_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_mask64x4(a);
let (b0, b1) = self.split_mask64x4(b);
self.combine_mask64x2(self.and_mask64x2(a0, b0), self.and_mask64x2(a1, b1))
}
#[inline(always)]
fn or_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_mask64x4(a);
let (b0, b1) = self.split_mask64x4(b);
self.combine_mask64x2(self.or_mask64x2(a0, b0), self.or_mask64x2(a1, b1))
}
#[inline(always)]
fn xor_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_mask64x4(a);
let (b0, b1) = self.split_mask64x4(b);
self.combine_mask64x2(self.xor_mask64x2(a0, b0), self.xor_mask64x2(a1, b1))
}
#[inline(always)]
fn not_mask64x4(self, a: mask64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_mask64x4(a);
self.combine_mask64x2(self.not_mask64x2(a0), self.not_mask64x2(a1))
}
#[inline(always)]
fn select_mask64x4(
self,
a: mask64x4<Self>,
b: mask64x4<Self>,
c: mask64x4<Self>,
) -> mask64x4<Self> {
let (a0, a1) = self.split_mask64x4(a);
let (b0, b1) = self.split_mask64x4(b);
let (c0, c1) = self.split_mask64x4(c);
self.combine_mask64x2(
self.select_mask64x2(a0, b0, c0),
self.select_mask64x2(a1, b1, c1),
)
}
#[inline(always)]
fn simd_eq_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x4<Self> {
let (a0, a1) = self.split_mask64x4(a);
let (b0, b1) = self.split_mask64x4(b);
self.combine_mask64x2(self.simd_eq_mask64x2(a0, b0), self.simd_eq_mask64x2(a1, b1))
}
#[inline(always)]
fn any_true_mask64x4(self, a: mask64x4<Self>) -> bool {
let (a0, a1) = self.split_mask64x4(a);
self.any_true_mask64x2(a0) || self.any_true_mask64x2(a1)
}
#[inline(always)]
fn all_true_mask64x4(self, a: mask64x4<Self>) -> bool {
let (a0, a1) = self.split_mask64x4(a);
self.all_true_mask64x2(a0) && self.all_true_mask64x2(a1)
}
#[inline(always)]
fn any_false_mask64x4(self, a: mask64x4<Self>) -> bool {
let (a0, a1) = self.split_mask64x4(a);
self.any_false_mask64x2(a0) || self.any_false_mask64x2(a1)
}
#[inline(always)]
fn all_false_mask64x4(self, a: mask64x4<Self>) -> bool {
let (a0, a1) = self.split_mask64x4(a);
self.all_false_mask64x2(a0) && self.all_false_mask64x2(a1)
}
#[inline(always)]
fn combine_mask64x4(self, a: mask64x4<Self>, b: mask64x4<Self>) -> mask64x8<Self> {
mask64x8 {
val: crate::support::Aligned512(int64x2x4_t(
a.val.0.0, a.val.0.1, b.val.0.0, b.val.0.1,
)),
simd: self,
}
}
#[inline(always)]
fn split_mask64x4(self, a: mask64x4<Self>) -> (mask64x2<Self>, mask64x2<Self>) {
(
mask64x2 {
val: crate::support::Aligned128(a.val.0.0),
simd: self,
},
mask64x2 {
val: crate::support::Aligned128(a.val.0.1),
simd: self,
},
)
}
#[inline(always)]
fn splat_f32x16(self, val: f32) -> f32x16<Self> {
let half = self.splat_f32x8(val);
self.combine_f32x8(half, half)
}
#[inline(always)]
fn load_array_f32x16(self, val: [f32; 16usize]) -> f32x16<Self> {
f32x16 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_f32x16(self, val: &[f32; 16usize]) -> f32x16<Self> {
f32x16 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_f32x16(self, a: f32x16<Self>) -> [f32; 16usize] {
unsafe { core::mem::transmute::<float32x4x4_t, [f32; 16usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_f32x16(self, a: &f32x16<Self>) -> &[f32; 16usize] {
unsafe { core::mem::transmute::<&float32x4x4_t, &[f32; 16usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_f32x16(self, a: &mut f32x16<Self>) -> &mut [f32; 16usize] {
unsafe { core::mem::transmute::<&mut float32x4x4_t, &mut [f32; 16usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_f32x16(self, a: f32x16<Self>, dest: &mut [f32; 16usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const f32,
dest.as_mut_ptr(),
16usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_f32x16(self, a: u8x64<Self>) -> f32x16<Self> {
unsafe {
f32x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_f32x16(self, a: f32x16<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_f32x16<const SHIFT: usize>(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
if SHIFT >= 16usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_f32x16(a).val.0;
let b_bytes = self.cvt_to_bytes_f32x16(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1, a_bytes.2, a_bytes.3];
let b_blocks = [b_bytes.0, b_bytes.1, b_bytes.2, b_bytes.3];
let shift_bytes = SHIFT * 4usize;
uint8x16x4_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
2,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
3,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_f32x16(u8x64 {
val: crate::support::Aligned512(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_f32x16<const SHIFT: usize>(
self,
a: f32x16<Self>,
b: f32x16<Self>,
) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(
self.slide_within_blocks_f32x8::<SHIFT>(a0, b0),
self.slide_within_blocks_f32x8::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn abs_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f32x8(self.abs_f32x8(a0), self.abs_f32x8(a1))
}
#[inline(always)]
fn neg_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f32x8(self.neg_f32x8(a0), self.neg_f32x8(a1))
}
#[inline(always)]
fn sqrt_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f32x8(self.sqrt_f32x8(a0), self.sqrt_f32x8(a1))
}
#[inline(always)]
fn add_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.add_f32x8(a0, b0), self.add_f32x8(a1, b1))
}
#[inline(always)]
fn sub_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.sub_f32x8(a0, b0), self.sub_f32x8(a1, b1))
}
#[inline(always)]
fn mul_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.mul_f32x8(a0, b0), self.mul_f32x8(a1, b1))
}
#[inline(always)]
fn div_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.div_f32x8(a0, b0), self.div_f32x8(a1, b1))
}
#[inline(always)]
fn copysign_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.copysign_f32x8(a0, b0), self.copysign_f32x8(a1, b1))
}
#[inline(always)]
fn simd_eq_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_mask32x8(self.simd_eq_f32x8(a0, b0), self.simd_eq_f32x8(a1, b1))
}
#[inline(always)]
fn simd_lt_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_mask32x8(self.simd_lt_f32x8(a0, b0), self.simd_lt_f32x8(a1, b1))
}
#[inline(always)]
fn simd_le_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_mask32x8(self.simd_le_f32x8(a0, b0), self.simd_le_f32x8(a1, b1))
}
#[inline(always)]
fn simd_ge_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_mask32x8(self.simd_ge_f32x8(a0, b0), self.simd_ge_f32x8(a1, b1))
}
#[inline(always)]
fn simd_gt_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_mask32x8(self.simd_gt_f32x8(a0, b0), self.simd_gt_f32x8(a1, b1))
}
#[inline(always)]
fn zip_low_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, _) = self.split_f32x16(a);
let (b0, _) = self.split_f32x16(b);
self.combine_f32x8(self.zip_low_f32x8(a0, b0), self.zip_high_f32x8(a0, b0))
}
#[inline(always)]
fn zip_high_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (_, a1) = self.split_f32x16(a);
let (_, b1) = self.split_f32x16(b);
self.combine_f32x8(self.zip_low_f32x8(a1, b1), self.zip_high_f32x8(a1, b1))
}
#[inline(always)]
fn unzip_low_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.unzip_low_f32x8(a0, a1), self.unzip_low_f32x8(b0, b1))
}
#[inline(always)]
fn unzip_high_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.unzip_high_f32x8(a0, a1), self.unzip_high_f32x8(b0, b1))
}
#[inline(always)]
fn max_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.max_f32x8(a0, b0), self.max_f32x8(a1, b1))
}
#[inline(always)]
fn min_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(self.min_f32x8(a0, b0), self.min_f32x8(a1, b1))
}
#[inline(always)]
fn max_precise_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(
self.max_precise_f32x8(a0, b0),
self.max_precise_f32x8(a1, b1),
)
}
#[inline(always)]
fn min_precise_f32x16(self, a: f32x16<Self>, b: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
self.combine_f32x8(
self.min_precise_f32x8(a0, b0),
self.min_precise_f32x8(a1, b1),
)
}
#[inline(always)]
fn mul_add_f32x16(self, a: f32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
let (c0, c1) = self.split_f32x16(c);
self.combine_f32x8(
self.mul_add_f32x8(a0, b0, c0),
self.mul_add_f32x8(a1, b1, c1),
)
}
#[inline(always)]
fn mul_sub_f32x16(self, a: f32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
let (b0, b1) = self.split_f32x16(b);
let (c0, c1) = self.split_f32x16(c);
self.combine_f32x8(
self.mul_sub_f32x8(a0, b0, c0),
self.mul_sub_f32x8(a1, b1, c1),
)
}
#[inline(always)]
fn floor_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f32x8(self.floor_f32x8(a0), self.floor_f32x8(a1))
}
#[inline(always)]
fn ceil_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f32x8(self.ceil_f32x8(a0), self.ceil_f32x8(a1))
}
#[inline(always)]
fn round_ties_even_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f32x8(
self.round_ties_even_f32x8(a0),
self.round_ties_even_f32x8(a1),
)
}
#[inline(always)]
fn fract_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f32x8(self.fract_f32x8(a0), self.fract_f32x8(a1))
}
#[inline(always)]
fn trunc_f32x16(self, a: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f32x8(self.trunc_f32x8(a0), self.trunc_f32x8(a1))
}
#[inline(always)]
fn select_f32x16(self, a: mask32x16<Self>, b: f32x16<Self>, c: f32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_f32x16(b);
let (c0, c1) = self.split_f32x16(c);
self.combine_f32x8(self.select_f32x8(a0, b0, c0), self.select_f32x8(a1, b1, c1))
}
#[inline(always)]
fn split_f32x16(self, a: f32x16<Self>) -> (f32x8<Self>, f32x8<Self>) {
(
f32x8 {
val: crate::support::Aligned256(float32x4x2_t(a.val.0.0, a.val.0.1)),
simd: self,
},
f32x8 {
val: crate::support::Aligned256(float32x4x2_t(a.val.0.2, a.val.0.3)),
simd: self,
},
)
}
#[inline(always)]
fn reinterpret_f64_f32x16(self, a: f32x16<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_f64x4(
self.reinterpret_f64_f32x8(a0),
self.reinterpret_f64_f32x8(a1),
)
}
#[inline(always)]
fn reinterpret_i32_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_i32x8(
self.reinterpret_i32_f32x8(a0),
self.reinterpret_i32_f32x8(a1),
)
}
#[inline(always)]
fn load_interleaved_128_f32x16(self, src: &[f32; 16usize]) -> f32x16<Self> {
unsafe { vld4q_f32(src.as_ptr()).simd_into(self) }
}
#[inline(always)]
fn store_interleaved_128_f32x16(self, a: f32x16<Self>, dest: &mut [f32; 16usize]) -> () {
unsafe { vst4q_f32(dest.as_mut_ptr(), a.into()) }
}
#[inline(always)]
fn reinterpret_u8_f32x16(self, a: f32x16<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_u8x32(self.reinterpret_u8_f32x8(a0), self.reinterpret_u8_f32x8(a1))
}
#[inline(always)]
fn reinterpret_u32_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_u32x8(
self.reinterpret_u32_f32x8(a0),
self.reinterpret_u32_f32x8(a1),
)
}
#[inline(always)]
fn cvt_u32_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_u32x8(self.cvt_u32_f32x8(a0), self.cvt_u32_f32x8(a1))
}
#[inline(always)]
fn cvt_u32_precise_f32x16(self, a: f32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_u32x8(
self.cvt_u32_precise_f32x8(a0),
self.cvt_u32_precise_f32x8(a1),
)
}
#[inline(always)]
fn cvt_i32_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_i32x8(self.cvt_i32_f32x8(a0), self.cvt_i32_f32x8(a1))
}
#[inline(always)]
fn cvt_i32_precise_f32x16(self, a: f32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_f32x16(a);
self.combine_i32x8(
self.cvt_i32_precise_f32x8(a0),
self.cvt_i32_precise_f32x8(a1),
)
}
#[inline(always)]
fn splat_i8x64(self, val: i8) -> i8x64<Self> {
let half = self.splat_i8x32(val);
self.combine_i8x32(half, half)
}
#[inline(always)]
fn load_array_i8x64(self, val: [i8; 64usize]) -> i8x64<Self> {
i8x64 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i8x64(self, val: &[i8; 64usize]) -> i8x64<Self> {
i8x64 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_i8x64(self, a: i8x64<Self>) -> [i8; 64usize] {
unsafe { core::mem::transmute::<int8x16x4_t, [i8; 64usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_i8x64(self, a: &i8x64<Self>) -> &[i8; 64usize] {
unsafe { core::mem::transmute::<&int8x16x4_t, &[i8; 64usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_i8x64(self, a: &mut i8x64<Self>) -> &mut [i8; 64usize] {
unsafe { core::mem::transmute::<&mut int8x16x4_t, &mut [i8; 64usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_i8x64(self, a: i8x64<Self>, dest: &mut [i8; 64usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i8,
dest.as_mut_ptr(),
64usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_i8x64(self, a: u8x64<Self>) -> i8x64<Self> {
unsafe {
i8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i8x64(self, a: i8x64<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i8x64<const SHIFT: usize>(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
if SHIFT >= 64usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_i8x64(a).val.0;
let b_bytes = self.cvt_to_bytes_i8x64(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1, a_bytes.2, a_bytes.3];
let b_blocks = [b_bytes.0, b_bytes.1, b_bytes.2, b_bytes.3];
let shift_bytes = SHIFT;
uint8x16x4_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
2,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
3,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_i8x64(u8x64 {
val: crate::support::Aligned512(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_i8x64<const SHIFT: usize>(
self,
a: i8x64<Self>,
b: i8x64<Self>,
) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(
self.slide_within_blocks_i8x32::<SHIFT>(a0, b0),
self.slide_within_blocks_i8x32::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.add_i8x32(a0, b0), self.add_i8x32(a1, b1))
}
#[inline(always)]
fn sub_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.sub_i8x32(a0, b0), self.sub_i8x32(a1, b1))
}
#[inline(always)]
fn mul_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.mul_i8x32(a0, b0), self.mul_i8x32(a1, b1))
}
#[inline(always)]
fn and_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.and_i8x32(a0, b0), self.and_i8x32(a1, b1))
}
#[inline(always)]
fn or_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.or_i8x32(a0, b0), self.or_i8x32(a1, b1))
}
#[inline(always)]
fn xor_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.xor_i8x32(a0, b0), self.xor_i8x32(a1, b1))
}
#[inline(always)]
fn not_i8x64(self, a: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
self.combine_i8x32(self.not_i8x32(a0), self.not_i8x32(a1))
}
#[inline(always)]
fn shl_i8x64(self, a: i8x64<Self>, shift: u32) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
self.combine_i8x32(self.shl_i8x32(a0, shift), self.shl_i8x32(a1, shift))
}
#[inline(always)]
fn shlv_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.shlv_i8x32(a0, b0), self.shlv_i8x32(a1, b1))
}
#[inline(always)]
fn shr_i8x64(self, a: i8x64<Self>, shift: u32) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
self.combine_i8x32(self.shr_i8x32(a0, shift), self.shr_i8x32(a1, shift))
}
#[inline(always)]
fn shrv_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.shrv_i8x32(a0, b0), self.shrv_i8x32(a1, b1))
}
#[inline(always)]
fn simd_eq_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_mask8x32(self.simd_eq_i8x32(a0, b0), self.simd_eq_i8x32(a1, b1))
}
#[inline(always)]
fn simd_lt_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_mask8x32(self.simd_lt_i8x32(a0, b0), self.simd_lt_i8x32(a1, b1))
}
#[inline(always)]
fn simd_le_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_mask8x32(self.simd_le_i8x32(a0, b0), self.simd_le_i8x32(a1, b1))
}
#[inline(always)]
fn simd_ge_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_mask8x32(self.simd_ge_i8x32(a0, b0), self.simd_ge_i8x32(a1, b1))
}
#[inline(always)]
fn simd_gt_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_mask8x32(self.simd_gt_i8x32(a0, b0), self.simd_gt_i8x32(a1, b1))
}
#[inline(always)]
fn zip_low_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, _) = self.split_i8x64(a);
let (b0, _) = self.split_i8x64(b);
self.combine_i8x32(self.zip_low_i8x32(a0, b0), self.zip_high_i8x32(a0, b0))
}
#[inline(always)]
fn zip_high_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (_, a1) = self.split_i8x64(a);
let (_, b1) = self.split_i8x64(b);
self.combine_i8x32(self.zip_low_i8x32(a1, b1), self.zip_high_i8x32(a1, b1))
}
#[inline(always)]
fn unzip_low_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.unzip_low_i8x32(a0, a1), self.unzip_low_i8x32(b0, b1))
}
#[inline(always)]
fn unzip_high_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.unzip_high_i8x32(a0, a1), self.unzip_high_i8x32(b0, b1))
}
#[inline(always)]
fn select_i8x64(self, a: mask8x64<Self>, b: i8x64<Self>, c: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_i8x64(b);
let (c0, c1) = self.split_i8x64(c);
self.combine_i8x32(self.select_i8x32(a0, b0, c0), self.select_i8x32(a1, b1, c1))
}
#[inline(always)]
fn min_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.min_i8x32(a0, b0), self.min_i8x32(a1, b1))
}
#[inline(always)]
fn max_i8x64(self, a: i8x64<Self>, b: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
let (b0, b1) = self.split_i8x64(b);
self.combine_i8x32(self.max_i8x32(a0, b0), self.max_i8x32(a1, b1))
}
#[inline(always)]
fn split_i8x64(self, a: i8x64<Self>) -> (i8x32<Self>, i8x32<Self>) {
(
i8x32 {
val: crate::support::Aligned256(int8x16x2_t(a.val.0.0, a.val.0.1)),
simd: self,
},
i8x32 {
val: crate::support::Aligned256(int8x16x2_t(a.val.0.2, a.val.0.3)),
simd: self,
},
)
}
#[inline(always)]
fn neg_i8x64(self, a: i8x64<Self>) -> i8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
self.combine_i8x32(self.neg_i8x32(a0), self.neg_i8x32(a1))
}
#[inline(always)]
fn reinterpret_u8_i8x64(self, a: i8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_i8x64(a);
self.combine_u8x32(self.reinterpret_u8_i8x32(a0), self.reinterpret_u8_i8x32(a1))
}
#[inline(always)]
fn reinterpret_u32_i8x64(self, a: i8x64<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_i8x64(a);
self.combine_u32x8(
self.reinterpret_u32_i8x32(a0),
self.reinterpret_u32_i8x32(a1),
)
}
#[inline(always)]
fn splat_u8x64(self, val: u8) -> u8x64<Self> {
let half = self.splat_u8x32(val);
self.combine_u8x32(half, half)
}
#[inline(always)]
fn load_array_u8x64(self, val: [u8; 64usize]) -> u8x64<Self> {
u8x64 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u8x64(self, val: &[u8; 64usize]) -> u8x64<Self> {
u8x64 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_u8x64(self, a: u8x64<Self>) -> [u8; 64usize] {
unsafe { core::mem::transmute::<uint8x16x4_t, [u8; 64usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_u8x64(self, a: &u8x64<Self>) -> &[u8; 64usize] {
unsafe { core::mem::transmute::<&uint8x16x4_t, &[u8; 64usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_u8x64(self, a: &mut u8x64<Self>) -> &mut [u8; 64usize] {
unsafe { core::mem::transmute::<&mut uint8x16x4_t, &mut [u8; 64usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_u8x64(self, a: u8x64<Self>, dest: &mut [u8; 64usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const u8,
dest.as_mut_ptr(),
64usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_u8x64(self, a: u8x64<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u8x64(self, a: u8x64<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u8x64<const SHIFT: usize>(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
if SHIFT >= 64usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_u8x64(a).val.0;
let b_bytes = self.cvt_to_bytes_u8x64(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1, a_bytes.2, a_bytes.3];
let b_blocks = [b_bytes.0, b_bytes.1, b_bytes.2, b_bytes.3];
let shift_bytes = SHIFT;
uint8x16x4_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
2,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
3,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_u8x64(u8x64 {
val: crate::support::Aligned512(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_u8x64<const SHIFT: usize>(
self,
a: u8x64<Self>,
b: u8x64<Self>,
) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(
self.slide_within_blocks_u8x32::<SHIFT>(a0, b0),
self.slide_within_blocks_u8x32::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.add_u8x32(a0, b0), self.add_u8x32(a1, b1))
}
#[inline(always)]
fn sub_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.sub_u8x32(a0, b0), self.sub_u8x32(a1, b1))
}
#[inline(always)]
fn mul_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.mul_u8x32(a0, b0), self.mul_u8x32(a1, b1))
}
#[inline(always)]
fn and_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.and_u8x32(a0, b0), self.and_u8x32(a1, b1))
}
#[inline(always)]
fn or_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.or_u8x32(a0, b0), self.or_u8x32(a1, b1))
}
#[inline(always)]
fn xor_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.xor_u8x32(a0, b0), self.xor_u8x32(a1, b1))
}
#[inline(always)]
fn not_u8x64(self, a: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
self.combine_u8x32(self.not_u8x32(a0), self.not_u8x32(a1))
}
#[inline(always)]
fn shl_u8x64(self, a: u8x64<Self>, shift: u32) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
self.combine_u8x32(self.shl_u8x32(a0, shift), self.shl_u8x32(a1, shift))
}
#[inline(always)]
fn shlv_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.shlv_u8x32(a0, b0), self.shlv_u8x32(a1, b1))
}
#[inline(always)]
fn shr_u8x64(self, a: u8x64<Self>, shift: u32) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
self.combine_u8x32(self.shr_u8x32(a0, shift), self.shr_u8x32(a1, shift))
}
#[inline(always)]
fn shrv_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.shrv_u8x32(a0, b0), self.shrv_u8x32(a1, b1))
}
#[inline(always)]
fn simd_eq_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_mask8x32(self.simd_eq_u8x32(a0, b0), self.simd_eq_u8x32(a1, b1))
}
#[inline(always)]
fn simd_lt_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_mask8x32(self.simd_lt_u8x32(a0, b0), self.simd_lt_u8x32(a1, b1))
}
#[inline(always)]
fn simd_le_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_mask8x32(self.simd_le_u8x32(a0, b0), self.simd_le_u8x32(a1, b1))
}
#[inline(always)]
fn simd_ge_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_mask8x32(self.simd_ge_u8x32(a0, b0), self.simd_ge_u8x32(a1, b1))
}
#[inline(always)]
fn simd_gt_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_mask8x32(self.simd_gt_u8x32(a0, b0), self.simd_gt_u8x32(a1, b1))
}
#[inline(always)]
fn zip_low_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, _) = self.split_u8x64(a);
let (b0, _) = self.split_u8x64(b);
self.combine_u8x32(self.zip_low_u8x32(a0, b0), self.zip_high_u8x32(a0, b0))
}
#[inline(always)]
fn zip_high_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (_, a1) = self.split_u8x64(a);
let (_, b1) = self.split_u8x64(b);
self.combine_u8x32(self.zip_low_u8x32(a1, b1), self.zip_high_u8x32(a1, b1))
}
#[inline(always)]
fn unzip_low_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.unzip_low_u8x32(a0, a1), self.unzip_low_u8x32(b0, b1))
}
#[inline(always)]
fn unzip_high_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.unzip_high_u8x32(a0, a1), self.unzip_high_u8x32(b0, b1))
}
#[inline(always)]
fn select_u8x64(self, a: mask8x64<Self>, b: u8x64<Self>, c: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_u8x64(b);
let (c0, c1) = self.split_u8x64(c);
self.combine_u8x32(self.select_u8x32(a0, b0, c0), self.select_u8x32(a1, b1, c1))
}
#[inline(always)]
fn min_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.min_u8x32(a0, b0), self.min_u8x32(a1, b1))
}
#[inline(always)]
fn max_u8x64(self, a: u8x64<Self>, b: u8x64<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u8x64(a);
let (b0, b1) = self.split_u8x64(b);
self.combine_u8x32(self.max_u8x32(a0, b0), self.max_u8x32(a1, b1))
}
#[inline(always)]
fn split_u8x64(self, a: u8x64<Self>) -> (u8x32<Self>, u8x32<Self>) {
(
u8x32 {
val: crate::support::Aligned256(uint8x16x2_t(a.val.0.0, a.val.0.1)),
simd: self,
},
u8x32 {
val: crate::support::Aligned256(uint8x16x2_t(a.val.0.2, a.val.0.3)),
simd: self,
},
)
}
#[inline(always)]
fn load_interleaved_128_u8x64(self, src: &[u8; 64usize]) -> u8x64<Self> {
unsafe { vld4q_u8(src.as_ptr()).simd_into(self) }
}
#[inline(always)]
fn store_interleaved_128_u8x64(self, a: u8x64<Self>, dest: &mut [u8; 64usize]) -> () {
unsafe { vst4q_u8(dest.as_mut_ptr(), a.into()) }
}
#[inline(always)]
fn reinterpret_u32_u8x64(self, a: u8x64<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u8x64(a);
self.combine_u32x8(
self.reinterpret_u32_u8x32(a0),
self.reinterpret_u32_u8x32(a1),
)
}
#[inline(always)]
fn splat_mask8x64(self, val: i8) -> mask8x64<Self> {
let half = self.splat_mask8x32(val);
self.combine_mask8x32(half, half)
}
#[inline(always)]
fn load_array_mask8x64(self, val: [i8; 64usize]) -> mask8x64<Self> {
mask8x64 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask8x64(self, val: &[i8; 64usize]) -> mask8x64<Self> {
mask8x64 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_mask8x64(self, a: mask8x64<Self>) -> [i8; 64usize] {
unsafe { core::mem::transmute::<int8x16x4_t, [i8; 64usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_mask8x64(self, a: &mask8x64<Self>) -> &[i8; 64usize] {
unsafe { core::mem::transmute::<&int8x16x4_t, &[i8; 64usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_mask8x64(self, a: &mut mask8x64<Self>) -> &mut [i8; 64usize] {
unsafe { core::mem::transmute::<&mut int8x16x4_t, &mut [i8; 64usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_mask8x64(self, a: mask8x64<Self>, dest: &mut [i8; 64usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i8,
dest.as_mut_ptr(),
64usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_mask8x64(self, a: u8x64<Self>) -> mask8x64<Self> {
unsafe {
mask8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask8x64(self, a: mask8x64<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask8x64<const SHIFT: usize>(
self,
a: mask8x64<Self>,
b: mask8x64<Self>,
) -> mask8x64<Self> {
if SHIFT >= 64usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_mask8x64(a).val.0;
let b_bytes = self.cvt_to_bytes_mask8x64(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1, a_bytes.2, a_bytes.3];
let b_blocks = [b_bytes.0, b_bytes.1, b_bytes.2, b_bytes.3];
let shift_bytes = SHIFT;
uint8x16x4_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
2,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
3,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_mask8x64(u8x64 {
val: crate::support::Aligned512(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_mask8x64<const SHIFT: usize>(
self,
a: mask8x64<Self>,
b: mask8x64<Self>,
) -> mask8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_mask8x64(b);
self.combine_mask8x32(
self.slide_within_blocks_mask8x32::<SHIFT>(a0, b0),
self.slide_within_blocks_mask8x32::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn and_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_mask8x64(b);
self.combine_mask8x32(self.and_mask8x32(a0, b0), self.and_mask8x32(a1, b1))
}
#[inline(always)]
fn or_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_mask8x64(b);
self.combine_mask8x32(self.or_mask8x32(a0, b0), self.or_mask8x32(a1, b1))
}
#[inline(always)]
fn xor_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_mask8x64(b);
self.combine_mask8x32(self.xor_mask8x32(a0, b0), self.xor_mask8x32(a1, b1))
}
#[inline(always)]
fn not_mask8x64(self, a: mask8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
self.combine_mask8x32(self.not_mask8x32(a0), self.not_mask8x32(a1))
}
#[inline(always)]
fn select_mask8x64(
self,
a: mask8x64<Self>,
b: mask8x64<Self>,
c: mask8x64<Self>,
) -> mask8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_mask8x64(b);
let (c0, c1) = self.split_mask8x64(c);
self.combine_mask8x32(
self.select_mask8x32(a0, b0, c0),
self.select_mask8x32(a1, b1, c1),
)
}
#[inline(always)]
fn simd_eq_mask8x64(self, a: mask8x64<Self>, b: mask8x64<Self>) -> mask8x64<Self> {
let (a0, a1) = self.split_mask8x64(a);
let (b0, b1) = self.split_mask8x64(b);
self.combine_mask8x32(self.simd_eq_mask8x32(a0, b0), self.simd_eq_mask8x32(a1, b1))
}
#[inline(always)]
fn any_true_mask8x64(self, a: mask8x64<Self>) -> bool {
let (a0, a1) = self.split_mask8x64(a);
self.any_true_mask8x32(a0) || self.any_true_mask8x32(a1)
}
#[inline(always)]
fn all_true_mask8x64(self, a: mask8x64<Self>) -> bool {
let (a0, a1) = self.split_mask8x64(a);
self.all_true_mask8x32(a0) && self.all_true_mask8x32(a1)
}
#[inline(always)]
fn any_false_mask8x64(self, a: mask8x64<Self>) -> bool {
let (a0, a1) = self.split_mask8x64(a);
self.any_false_mask8x32(a0) || self.any_false_mask8x32(a1)
}
#[inline(always)]
fn all_false_mask8x64(self, a: mask8x64<Self>) -> bool {
let (a0, a1) = self.split_mask8x64(a);
self.all_false_mask8x32(a0) && self.all_false_mask8x32(a1)
}
#[inline(always)]
fn split_mask8x64(self, a: mask8x64<Self>) -> (mask8x32<Self>, mask8x32<Self>) {
(
mask8x32 {
val: crate::support::Aligned256(int8x16x2_t(a.val.0.0, a.val.0.1)),
simd: self,
},
mask8x32 {
val: crate::support::Aligned256(int8x16x2_t(a.val.0.2, a.val.0.3)),
simd: self,
},
)
}
#[inline(always)]
fn splat_i16x32(self, val: i16) -> i16x32<Self> {
let half = self.splat_i16x16(val);
self.combine_i16x16(half, half)
}
#[inline(always)]
fn load_array_i16x32(self, val: [i16; 32usize]) -> i16x32<Self> {
i16x32 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i16x32(self, val: &[i16; 32usize]) -> i16x32<Self> {
i16x32 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_i16x32(self, a: i16x32<Self>) -> [i16; 32usize] {
unsafe { core::mem::transmute::<int16x8x4_t, [i16; 32usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_i16x32(self, a: &i16x32<Self>) -> &[i16; 32usize] {
unsafe { core::mem::transmute::<&int16x8x4_t, &[i16; 32usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_i16x32(self, a: &mut i16x32<Self>) -> &mut [i16; 32usize] {
unsafe { core::mem::transmute::<&mut int16x8x4_t, &mut [i16; 32usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_i16x32(self, a: i16x32<Self>, dest: &mut [i16; 32usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i16,
dest.as_mut_ptr(),
32usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_i16x32(self, a: u8x64<Self>) -> i16x32<Self> {
unsafe {
i16x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i16x32(self, a: i16x32<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i16x32<const SHIFT: usize>(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
if SHIFT >= 32usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_i16x32(a).val.0;
let b_bytes = self.cvt_to_bytes_i16x32(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1, a_bytes.2, a_bytes.3];
let b_blocks = [b_bytes.0, b_bytes.1, b_bytes.2, b_bytes.3];
let shift_bytes = SHIFT * 2usize;
uint8x16x4_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
2,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
3,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_i16x32(u8x64 {
val: crate::support::Aligned512(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_i16x32<const SHIFT: usize>(
self,
a: i16x32<Self>,
b: i16x32<Self>,
) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(
self.slide_within_blocks_i16x16::<SHIFT>(a0, b0),
self.slide_within_blocks_i16x16::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.add_i16x16(a0, b0), self.add_i16x16(a1, b1))
}
#[inline(always)]
fn sub_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.sub_i16x16(a0, b0), self.sub_i16x16(a1, b1))
}
#[inline(always)]
fn mul_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.mul_i16x16(a0, b0), self.mul_i16x16(a1, b1))
}
#[inline(always)]
fn and_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.and_i16x16(a0, b0), self.and_i16x16(a1, b1))
}
#[inline(always)]
fn or_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.or_i16x16(a0, b0), self.or_i16x16(a1, b1))
}
#[inline(always)]
fn xor_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.xor_i16x16(a0, b0), self.xor_i16x16(a1, b1))
}
#[inline(always)]
fn not_i16x32(self, a: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
self.combine_i16x16(self.not_i16x16(a0), self.not_i16x16(a1))
}
#[inline(always)]
fn shl_i16x32(self, a: i16x32<Self>, shift: u32) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
self.combine_i16x16(self.shl_i16x16(a0, shift), self.shl_i16x16(a1, shift))
}
#[inline(always)]
fn shlv_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.shlv_i16x16(a0, b0), self.shlv_i16x16(a1, b1))
}
#[inline(always)]
fn shr_i16x32(self, a: i16x32<Self>, shift: u32) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
self.combine_i16x16(self.shr_i16x16(a0, shift), self.shr_i16x16(a1, shift))
}
#[inline(always)]
fn shrv_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.shrv_i16x16(a0, b0), self.shrv_i16x16(a1, b1))
}
#[inline(always)]
fn simd_eq_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_mask16x16(self.simd_eq_i16x16(a0, b0), self.simd_eq_i16x16(a1, b1))
}
#[inline(always)]
fn simd_lt_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_mask16x16(self.simd_lt_i16x16(a0, b0), self.simd_lt_i16x16(a1, b1))
}
#[inline(always)]
fn simd_le_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_mask16x16(self.simd_le_i16x16(a0, b0), self.simd_le_i16x16(a1, b1))
}
#[inline(always)]
fn simd_ge_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_mask16x16(self.simd_ge_i16x16(a0, b0), self.simd_ge_i16x16(a1, b1))
}
#[inline(always)]
fn simd_gt_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_mask16x16(self.simd_gt_i16x16(a0, b0), self.simd_gt_i16x16(a1, b1))
}
#[inline(always)]
fn zip_low_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, _) = self.split_i16x32(a);
let (b0, _) = self.split_i16x32(b);
self.combine_i16x16(self.zip_low_i16x16(a0, b0), self.zip_high_i16x16(a0, b0))
}
#[inline(always)]
fn zip_high_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (_, a1) = self.split_i16x32(a);
let (_, b1) = self.split_i16x32(b);
self.combine_i16x16(self.zip_low_i16x16(a1, b1), self.zip_high_i16x16(a1, b1))
}
#[inline(always)]
fn unzip_low_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.unzip_low_i16x16(a0, a1), self.unzip_low_i16x16(b0, b1))
}
#[inline(always)]
fn unzip_high_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(
self.unzip_high_i16x16(a0, a1),
self.unzip_high_i16x16(b0, b1),
)
}
#[inline(always)]
fn select_i16x32(self, a: mask16x32<Self>, b: i16x32<Self>, c: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_i16x32(b);
let (c0, c1) = self.split_i16x32(c);
self.combine_i16x16(
self.select_i16x16(a0, b0, c0),
self.select_i16x16(a1, b1, c1),
)
}
#[inline(always)]
fn min_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.min_i16x16(a0, b0), self.min_i16x16(a1, b1))
}
#[inline(always)]
fn max_i16x32(self, a: i16x32<Self>, b: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
let (b0, b1) = self.split_i16x32(b);
self.combine_i16x16(self.max_i16x16(a0, b0), self.max_i16x16(a1, b1))
}
#[inline(always)]
fn split_i16x32(self, a: i16x32<Self>) -> (i16x16<Self>, i16x16<Self>) {
(
i16x16 {
val: crate::support::Aligned256(int16x8x2_t(a.val.0.0, a.val.0.1)),
simd: self,
},
i16x16 {
val: crate::support::Aligned256(int16x8x2_t(a.val.0.2, a.val.0.3)),
simd: self,
},
)
}
#[inline(always)]
fn neg_i16x32(self, a: i16x32<Self>) -> i16x32<Self> {
let (a0, a1) = self.split_i16x32(a);
self.combine_i16x16(self.neg_i16x16(a0), self.neg_i16x16(a1))
}
#[inline(always)]
fn reinterpret_u8_i16x32(self, a: i16x32<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_i16x32(a);
self.combine_u8x32(
self.reinterpret_u8_i16x16(a0),
self.reinterpret_u8_i16x16(a1),
)
}
#[inline(always)]
fn reinterpret_u32_i16x32(self, a: i16x32<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_i16x32(a);
self.combine_u32x8(
self.reinterpret_u32_i16x16(a0),
self.reinterpret_u32_i16x16(a1),
)
}
#[inline(always)]
fn splat_u16x32(self, val: u16) -> u16x32<Self> {
let half = self.splat_u16x16(val);
self.combine_u16x16(half, half)
}
#[inline(always)]
fn load_array_u16x32(self, val: [u16; 32usize]) -> u16x32<Self> {
u16x32 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u16x32(self, val: &[u16; 32usize]) -> u16x32<Self> {
u16x32 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_u16x32(self, a: u16x32<Self>) -> [u16; 32usize] {
unsafe { core::mem::transmute::<uint16x8x4_t, [u16; 32usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_u16x32(self, a: &u16x32<Self>) -> &[u16; 32usize] {
unsafe { core::mem::transmute::<&uint16x8x4_t, &[u16; 32usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_u16x32(self, a: &mut u16x32<Self>) -> &mut [u16; 32usize] {
unsafe { core::mem::transmute::<&mut uint16x8x4_t, &mut [u16; 32usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_u16x32(self, a: u16x32<Self>, dest: &mut [u16; 32usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const u16,
dest.as_mut_ptr(),
32usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_u16x32(self, a: u8x64<Self>) -> u16x32<Self> {
unsafe {
u16x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u16x32(self, a: u16x32<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u16x32<const SHIFT: usize>(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
if SHIFT >= 32usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_u16x32(a).val.0;
let b_bytes = self.cvt_to_bytes_u16x32(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1, a_bytes.2, a_bytes.3];
let b_blocks = [b_bytes.0, b_bytes.1, b_bytes.2, b_bytes.3];
let shift_bytes = SHIFT * 2usize;
uint8x16x4_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
2,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
3,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_u16x32(u8x64 {
val: crate::support::Aligned512(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_u16x32<const SHIFT: usize>(
self,
a: u16x32<Self>,
b: u16x32<Self>,
) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(
self.slide_within_blocks_u16x16::<SHIFT>(a0, b0),
self.slide_within_blocks_u16x16::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.add_u16x16(a0, b0), self.add_u16x16(a1, b1))
}
#[inline(always)]
fn sub_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.sub_u16x16(a0, b0), self.sub_u16x16(a1, b1))
}
#[inline(always)]
fn mul_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.mul_u16x16(a0, b0), self.mul_u16x16(a1, b1))
}
#[inline(always)]
fn and_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.and_u16x16(a0, b0), self.and_u16x16(a1, b1))
}
#[inline(always)]
fn or_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.or_u16x16(a0, b0), self.or_u16x16(a1, b1))
}
#[inline(always)]
fn xor_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.xor_u16x16(a0, b0), self.xor_u16x16(a1, b1))
}
#[inline(always)]
fn not_u16x32(self, a: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
self.combine_u16x16(self.not_u16x16(a0), self.not_u16x16(a1))
}
#[inline(always)]
fn shl_u16x32(self, a: u16x32<Self>, shift: u32) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
self.combine_u16x16(self.shl_u16x16(a0, shift), self.shl_u16x16(a1, shift))
}
#[inline(always)]
fn shlv_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.shlv_u16x16(a0, b0), self.shlv_u16x16(a1, b1))
}
#[inline(always)]
fn shr_u16x32(self, a: u16x32<Self>, shift: u32) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
self.combine_u16x16(self.shr_u16x16(a0, shift), self.shr_u16x16(a1, shift))
}
#[inline(always)]
fn shrv_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.shrv_u16x16(a0, b0), self.shrv_u16x16(a1, b1))
}
#[inline(always)]
fn simd_eq_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_mask16x16(self.simd_eq_u16x16(a0, b0), self.simd_eq_u16x16(a1, b1))
}
#[inline(always)]
fn simd_lt_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_mask16x16(self.simd_lt_u16x16(a0, b0), self.simd_lt_u16x16(a1, b1))
}
#[inline(always)]
fn simd_le_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_mask16x16(self.simd_le_u16x16(a0, b0), self.simd_le_u16x16(a1, b1))
}
#[inline(always)]
fn simd_ge_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_mask16x16(self.simd_ge_u16x16(a0, b0), self.simd_ge_u16x16(a1, b1))
}
#[inline(always)]
fn simd_gt_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_mask16x16(self.simd_gt_u16x16(a0, b0), self.simd_gt_u16x16(a1, b1))
}
#[inline(always)]
fn zip_low_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, _) = self.split_u16x32(a);
let (b0, _) = self.split_u16x32(b);
self.combine_u16x16(self.zip_low_u16x16(a0, b0), self.zip_high_u16x16(a0, b0))
}
#[inline(always)]
fn zip_high_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (_, a1) = self.split_u16x32(a);
let (_, b1) = self.split_u16x32(b);
self.combine_u16x16(self.zip_low_u16x16(a1, b1), self.zip_high_u16x16(a1, b1))
}
#[inline(always)]
fn unzip_low_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.unzip_low_u16x16(a0, a1), self.unzip_low_u16x16(b0, b1))
}
#[inline(always)]
fn unzip_high_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(
self.unzip_high_u16x16(a0, a1),
self.unzip_high_u16x16(b0, b1),
)
}
#[inline(always)]
fn select_u16x32(self, a: mask16x32<Self>, b: u16x32<Self>, c: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_u16x32(b);
let (c0, c1) = self.split_u16x32(c);
self.combine_u16x16(
self.select_u16x16(a0, b0, c0),
self.select_u16x16(a1, b1, c1),
)
}
#[inline(always)]
fn min_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.min_u16x16(a0, b0), self.min_u16x16(a1, b1))
}
#[inline(always)]
fn max_u16x32(self, a: u16x32<Self>, b: u16x32<Self>) -> u16x32<Self> {
let (a0, a1) = self.split_u16x32(a);
let (b0, b1) = self.split_u16x32(b);
self.combine_u16x16(self.max_u16x16(a0, b0), self.max_u16x16(a1, b1))
}
#[inline(always)]
fn split_u16x32(self, a: u16x32<Self>) -> (u16x16<Self>, u16x16<Self>) {
(
u16x16 {
val: crate::support::Aligned256(uint16x8x2_t(a.val.0.0, a.val.0.1)),
simd: self,
},
u16x16 {
val: crate::support::Aligned256(uint16x8x2_t(a.val.0.2, a.val.0.3)),
simd: self,
},
)
}
#[inline(always)]
fn load_interleaved_128_u16x32(self, src: &[u16; 32usize]) -> u16x32<Self> {
unsafe { vld4q_u16(src.as_ptr()).simd_into(self) }
}
#[inline(always)]
fn store_interleaved_128_u16x32(self, a: u16x32<Self>, dest: &mut [u16; 32usize]) -> () {
unsafe { vst4q_u16(dest.as_mut_ptr(), a.into()) }
}
#[inline(always)]
fn narrow_u16x32(self, a: u16x32<Self>) -> u8x32<Self> {
let (a0, a1) = self.split_u16x32(a);
self.combine_u8x16(self.narrow_u16x16(a0), self.narrow_u16x16(a1))
}
#[inline(always)]
fn reinterpret_u8_u16x32(self, a: u16x32<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u16x32(a);
self.combine_u8x32(
self.reinterpret_u8_u16x16(a0),
self.reinterpret_u8_u16x16(a1),
)
}
#[inline(always)]
fn reinterpret_u32_u16x32(self, a: u16x32<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u16x32(a);
self.combine_u32x8(
self.reinterpret_u32_u16x16(a0),
self.reinterpret_u32_u16x16(a1),
)
}
#[inline(always)]
fn splat_mask16x32(self, val: i16) -> mask16x32<Self> {
let half = self.splat_mask16x16(val);
self.combine_mask16x16(half, half)
}
#[inline(always)]
fn load_array_mask16x32(self, val: [i16; 32usize]) -> mask16x32<Self> {
mask16x32 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask16x32(self, val: &[i16; 32usize]) -> mask16x32<Self> {
mask16x32 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_mask16x32(self, a: mask16x32<Self>) -> [i16; 32usize] {
unsafe { core::mem::transmute::<int16x8x4_t, [i16; 32usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_mask16x32(self, a: &mask16x32<Self>) -> &[i16; 32usize] {
unsafe { core::mem::transmute::<&int16x8x4_t, &[i16; 32usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_mask16x32(self, a: &mut mask16x32<Self>) -> &mut [i16; 32usize] {
unsafe { core::mem::transmute::<&mut int16x8x4_t, &mut [i16; 32usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_mask16x32(self, a: mask16x32<Self>, dest: &mut [i16; 32usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i16,
dest.as_mut_ptr(),
32usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_mask16x32(self, a: u8x64<Self>) -> mask16x32<Self> {
unsafe {
mask16x32 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask16x32(self, a: mask16x32<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask16x32<const SHIFT: usize>(
self,
a: mask16x32<Self>,
b: mask16x32<Self>,
) -> mask16x32<Self> {
if SHIFT >= 32usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_mask16x32(a).val.0;
let b_bytes = self.cvt_to_bytes_mask16x32(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1, a_bytes.2, a_bytes.3];
let b_blocks = [b_bytes.0, b_bytes.1, b_bytes.2, b_bytes.3];
let shift_bytes = SHIFT * 2usize;
uint8x16x4_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
2,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
3,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_mask16x32(u8x64 {
val: crate::support::Aligned512(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_mask16x32<const SHIFT: usize>(
self,
a: mask16x32<Self>,
b: mask16x32<Self>,
) -> mask16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_mask16x32(b);
self.combine_mask16x16(
self.slide_within_blocks_mask16x16::<SHIFT>(a0, b0),
self.slide_within_blocks_mask16x16::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn and_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_mask16x32(b);
self.combine_mask16x16(self.and_mask16x16(a0, b0), self.and_mask16x16(a1, b1))
}
#[inline(always)]
fn or_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_mask16x32(b);
self.combine_mask16x16(self.or_mask16x16(a0, b0), self.or_mask16x16(a1, b1))
}
#[inline(always)]
fn xor_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_mask16x32(b);
self.combine_mask16x16(self.xor_mask16x16(a0, b0), self.xor_mask16x16(a1, b1))
}
#[inline(always)]
fn not_mask16x32(self, a: mask16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
self.combine_mask16x16(self.not_mask16x16(a0), self.not_mask16x16(a1))
}
#[inline(always)]
fn select_mask16x32(
self,
a: mask16x32<Self>,
b: mask16x32<Self>,
c: mask16x32<Self>,
) -> mask16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_mask16x32(b);
let (c0, c1) = self.split_mask16x32(c);
self.combine_mask16x16(
self.select_mask16x16(a0, b0, c0),
self.select_mask16x16(a1, b1, c1),
)
}
#[inline(always)]
fn simd_eq_mask16x32(self, a: mask16x32<Self>, b: mask16x32<Self>) -> mask16x32<Self> {
let (a0, a1) = self.split_mask16x32(a);
let (b0, b1) = self.split_mask16x32(b);
self.combine_mask16x16(
self.simd_eq_mask16x16(a0, b0),
self.simd_eq_mask16x16(a1, b1),
)
}
#[inline(always)]
fn any_true_mask16x32(self, a: mask16x32<Self>) -> bool {
let (a0, a1) = self.split_mask16x32(a);
self.any_true_mask16x16(a0) || self.any_true_mask16x16(a1)
}
#[inline(always)]
fn all_true_mask16x32(self, a: mask16x32<Self>) -> bool {
let (a0, a1) = self.split_mask16x32(a);
self.all_true_mask16x16(a0) && self.all_true_mask16x16(a1)
}
#[inline(always)]
fn any_false_mask16x32(self, a: mask16x32<Self>) -> bool {
let (a0, a1) = self.split_mask16x32(a);
self.any_false_mask16x16(a0) || self.any_false_mask16x16(a1)
}
#[inline(always)]
fn all_false_mask16x32(self, a: mask16x32<Self>) -> bool {
let (a0, a1) = self.split_mask16x32(a);
self.all_false_mask16x16(a0) && self.all_false_mask16x16(a1)
}
#[inline(always)]
fn split_mask16x32(self, a: mask16x32<Self>) -> (mask16x16<Self>, mask16x16<Self>) {
(
mask16x16 {
val: crate::support::Aligned256(int16x8x2_t(a.val.0.0, a.val.0.1)),
simd: self,
},
mask16x16 {
val: crate::support::Aligned256(int16x8x2_t(a.val.0.2, a.val.0.3)),
simd: self,
},
)
}
#[inline(always)]
fn splat_i32x16(self, val: i32) -> i32x16<Self> {
let half = self.splat_i32x8(val);
self.combine_i32x8(half, half)
}
#[inline(always)]
fn load_array_i32x16(self, val: [i32; 16usize]) -> i32x16<Self> {
i32x16 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_i32x16(self, val: &[i32; 16usize]) -> i32x16<Self> {
i32x16 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_i32x16(self, a: i32x16<Self>) -> [i32; 16usize] {
unsafe { core::mem::transmute::<int32x4x4_t, [i32; 16usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_i32x16(self, a: &i32x16<Self>) -> &[i32; 16usize] {
unsafe { core::mem::transmute::<&int32x4x4_t, &[i32; 16usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_i32x16(self, a: &mut i32x16<Self>) -> &mut [i32; 16usize] {
unsafe { core::mem::transmute::<&mut int32x4x4_t, &mut [i32; 16usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_i32x16(self, a: i32x16<Self>, dest: &mut [i32; 16usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i32,
dest.as_mut_ptr(),
16usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_i32x16(self, a: u8x64<Self>) -> i32x16<Self> {
unsafe {
i32x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_i32x16(self, a: i32x16<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_i32x16<const SHIFT: usize>(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
if SHIFT >= 16usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_i32x16(a).val.0;
let b_bytes = self.cvt_to_bytes_i32x16(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1, a_bytes.2, a_bytes.3];
let b_blocks = [b_bytes.0, b_bytes.1, b_bytes.2, b_bytes.3];
let shift_bytes = SHIFT * 4usize;
uint8x16x4_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
2,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
3,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_i32x16(u8x64 {
val: crate::support::Aligned512(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_i32x16<const SHIFT: usize>(
self,
a: i32x16<Self>,
b: i32x16<Self>,
) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(
self.slide_within_blocks_i32x8::<SHIFT>(a0, b0),
self.slide_within_blocks_i32x8::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.add_i32x8(a0, b0), self.add_i32x8(a1, b1))
}
#[inline(always)]
fn sub_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.sub_i32x8(a0, b0), self.sub_i32x8(a1, b1))
}
#[inline(always)]
fn mul_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.mul_i32x8(a0, b0), self.mul_i32x8(a1, b1))
}
#[inline(always)]
fn and_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.and_i32x8(a0, b0), self.and_i32x8(a1, b1))
}
#[inline(always)]
fn or_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.or_i32x8(a0, b0), self.or_i32x8(a1, b1))
}
#[inline(always)]
fn xor_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.xor_i32x8(a0, b0), self.xor_i32x8(a1, b1))
}
#[inline(always)]
fn not_i32x16(self, a: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
self.combine_i32x8(self.not_i32x8(a0), self.not_i32x8(a1))
}
#[inline(always)]
fn shl_i32x16(self, a: i32x16<Self>, shift: u32) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
self.combine_i32x8(self.shl_i32x8(a0, shift), self.shl_i32x8(a1, shift))
}
#[inline(always)]
fn shlv_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.shlv_i32x8(a0, b0), self.shlv_i32x8(a1, b1))
}
#[inline(always)]
fn shr_i32x16(self, a: i32x16<Self>, shift: u32) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
self.combine_i32x8(self.shr_i32x8(a0, shift), self.shr_i32x8(a1, shift))
}
#[inline(always)]
fn shrv_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.shrv_i32x8(a0, b0), self.shrv_i32x8(a1, b1))
}
#[inline(always)]
fn simd_eq_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_mask32x8(self.simd_eq_i32x8(a0, b0), self.simd_eq_i32x8(a1, b1))
}
#[inline(always)]
fn simd_lt_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_mask32x8(self.simd_lt_i32x8(a0, b0), self.simd_lt_i32x8(a1, b1))
}
#[inline(always)]
fn simd_le_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_mask32x8(self.simd_le_i32x8(a0, b0), self.simd_le_i32x8(a1, b1))
}
#[inline(always)]
fn simd_ge_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_mask32x8(self.simd_ge_i32x8(a0, b0), self.simd_ge_i32x8(a1, b1))
}
#[inline(always)]
fn simd_gt_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_mask32x8(self.simd_gt_i32x8(a0, b0), self.simd_gt_i32x8(a1, b1))
}
#[inline(always)]
fn zip_low_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, _) = self.split_i32x16(a);
let (b0, _) = self.split_i32x16(b);
self.combine_i32x8(self.zip_low_i32x8(a0, b0), self.zip_high_i32x8(a0, b0))
}
#[inline(always)]
fn zip_high_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (_, a1) = self.split_i32x16(a);
let (_, b1) = self.split_i32x16(b);
self.combine_i32x8(self.zip_low_i32x8(a1, b1), self.zip_high_i32x8(a1, b1))
}
#[inline(always)]
fn unzip_low_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.unzip_low_i32x8(a0, a1), self.unzip_low_i32x8(b0, b1))
}
#[inline(always)]
fn unzip_high_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.unzip_high_i32x8(a0, a1), self.unzip_high_i32x8(b0, b1))
}
#[inline(always)]
fn select_i32x16(self, a: mask32x16<Self>, b: i32x16<Self>, c: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_i32x16(b);
let (c0, c1) = self.split_i32x16(c);
self.combine_i32x8(self.select_i32x8(a0, b0, c0), self.select_i32x8(a1, b1, c1))
}
#[inline(always)]
fn min_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.min_i32x8(a0, b0), self.min_i32x8(a1, b1))
}
#[inline(always)]
fn max_i32x16(self, a: i32x16<Self>, b: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
let (b0, b1) = self.split_i32x16(b);
self.combine_i32x8(self.max_i32x8(a0, b0), self.max_i32x8(a1, b1))
}
#[inline(always)]
fn split_i32x16(self, a: i32x16<Self>) -> (i32x8<Self>, i32x8<Self>) {
(
i32x8 {
val: crate::support::Aligned256(int32x4x2_t(a.val.0.0, a.val.0.1)),
simd: self,
},
i32x8 {
val: crate::support::Aligned256(int32x4x2_t(a.val.0.2, a.val.0.3)),
simd: self,
},
)
}
#[inline(always)]
fn neg_i32x16(self, a: i32x16<Self>) -> i32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
self.combine_i32x8(self.neg_i32x8(a0), self.neg_i32x8(a1))
}
#[inline(always)]
fn reinterpret_u8_i32x16(self, a: i32x16<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_i32x16(a);
self.combine_u8x32(self.reinterpret_u8_i32x8(a0), self.reinterpret_u8_i32x8(a1))
}
#[inline(always)]
fn reinterpret_u32_i32x16(self, a: i32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
self.combine_u32x8(
self.reinterpret_u32_i32x8(a0),
self.reinterpret_u32_i32x8(a1),
)
}
#[inline(always)]
fn cvt_f32_i32x16(self, a: i32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_i32x16(a);
self.combine_f32x8(self.cvt_f32_i32x8(a0), self.cvt_f32_i32x8(a1))
}
#[inline(always)]
fn splat_u32x16(self, val: u32) -> u32x16<Self> {
let half = self.splat_u32x8(val);
self.combine_u32x8(half, half)
}
#[inline(always)]
fn load_array_u32x16(self, val: [u32; 16usize]) -> u32x16<Self> {
u32x16 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_u32x16(self, val: &[u32; 16usize]) -> u32x16<Self> {
u32x16 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_u32x16(self, a: u32x16<Self>) -> [u32; 16usize] {
unsafe { core::mem::transmute::<uint32x4x4_t, [u32; 16usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_u32x16(self, a: &u32x16<Self>) -> &[u32; 16usize] {
unsafe { core::mem::transmute::<&uint32x4x4_t, &[u32; 16usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_u32x16(self, a: &mut u32x16<Self>) -> &mut [u32; 16usize] {
unsafe { core::mem::transmute::<&mut uint32x4x4_t, &mut [u32; 16usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_u32x16(self, a: u32x16<Self>, dest: &mut [u32; 16usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const u32,
dest.as_mut_ptr(),
16usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_u32x16(self, a: u8x64<Self>) -> u32x16<Self> {
unsafe {
u32x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_u32x16(self, a: u32x16<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_u32x16<const SHIFT: usize>(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
if SHIFT >= 16usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_u32x16(a).val.0;
let b_bytes = self.cvt_to_bytes_u32x16(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1, a_bytes.2, a_bytes.3];
let b_blocks = [b_bytes.0, b_bytes.1, b_bytes.2, b_bytes.3];
let shift_bytes = SHIFT * 4usize;
uint8x16x4_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
2,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
3,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_u32x16(u8x64 {
val: crate::support::Aligned512(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_u32x16<const SHIFT: usize>(
self,
a: u32x16<Self>,
b: u32x16<Self>,
) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(
self.slide_within_blocks_u32x8::<SHIFT>(a0, b0),
self.slide_within_blocks_u32x8::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn add_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.add_u32x8(a0, b0), self.add_u32x8(a1, b1))
}
#[inline(always)]
fn sub_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.sub_u32x8(a0, b0), self.sub_u32x8(a1, b1))
}
#[inline(always)]
fn mul_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.mul_u32x8(a0, b0), self.mul_u32x8(a1, b1))
}
#[inline(always)]
fn and_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.and_u32x8(a0, b0), self.and_u32x8(a1, b1))
}
#[inline(always)]
fn or_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.or_u32x8(a0, b0), self.or_u32x8(a1, b1))
}
#[inline(always)]
fn xor_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.xor_u32x8(a0, b0), self.xor_u32x8(a1, b1))
}
#[inline(always)]
fn not_u32x16(self, a: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
self.combine_u32x8(self.not_u32x8(a0), self.not_u32x8(a1))
}
#[inline(always)]
fn shl_u32x16(self, a: u32x16<Self>, shift: u32) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
self.combine_u32x8(self.shl_u32x8(a0, shift), self.shl_u32x8(a1, shift))
}
#[inline(always)]
fn shlv_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.shlv_u32x8(a0, b0), self.shlv_u32x8(a1, b1))
}
#[inline(always)]
fn shr_u32x16(self, a: u32x16<Self>, shift: u32) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
self.combine_u32x8(self.shr_u32x8(a0, shift), self.shr_u32x8(a1, shift))
}
#[inline(always)]
fn shrv_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.shrv_u32x8(a0, b0), self.shrv_u32x8(a1, b1))
}
#[inline(always)]
fn simd_eq_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_mask32x8(self.simd_eq_u32x8(a0, b0), self.simd_eq_u32x8(a1, b1))
}
#[inline(always)]
fn simd_lt_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_mask32x8(self.simd_lt_u32x8(a0, b0), self.simd_lt_u32x8(a1, b1))
}
#[inline(always)]
fn simd_le_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_mask32x8(self.simd_le_u32x8(a0, b0), self.simd_le_u32x8(a1, b1))
}
#[inline(always)]
fn simd_ge_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_mask32x8(self.simd_ge_u32x8(a0, b0), self.simd_ge_u32x8(a1, b1))
}
#[inline(always)]
fn simd_gt_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_mask32x8(self.simd_gt_u32x8(a0, b0), self.simd_gt_u32x8(a1, b1))
}
#[inline(always)]
fn zip_low_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, _) = self.split_u32x16(a);
let (b0, _) = self.split_u32x16(b);
self.combine_u32x8(self.zip_low_u32x8(a0, b0), self.zip_high_u32x8(a0, b0))
}
#[inline(always)]
fn zip_high_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (_, a1) = self.split_u32x16(a);
let (_, b1) = self.split_u32x16(b);
self.combine_u32x8(self.zip_low_u32x8(a1, b1), self.zip_high_u32x8(a1, b1))
}
#[inline(always)]
fn unzip_low_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.unzip_low_u32x8(a0, a1), self.unzip_low_u32x8(b0, b1))
}
#[inline(always)]
fn unzip_high_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.unzip_high_u32x8(a0, a1), self.unzip_high_u32x8(b0, b1))
}
#[inline(always)]
fn select_u32x16(self, a: mask32x16<Self>, b: u32x16<Self>, c: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_u32x16(b);
let (c0, c1) = self.split_u32x16(c);
self.combine_u32x8(self.select_u32x8(a0, b0, c0), self.select_u32x8(a1, b1, c1))
}
#[inline(always)]
fn min_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.min_u32x8(a0, b0), self.min_u32x8(a1, b1))
}
#[inline(always)]
fn max_u32x16(self, a: u32x16<Self>, b: u32x16<Self>) -> u32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
let (b0, b1) = self.split_u32x16(b);
self.combine_u32x8(self.max_u32x8(a0, b0), self.max_u32x8(a1, b1))
}
#[inline(always)]
fn split_u32x16(self, a: u32x16<Self>) -> (u32x8<Self>, u32x8<Self>) {
(
u32x8 {
val: crate::support::Aligned256(uint32x4x2_t(a.val.0.0, a.val.0.1)),
simd: self,
},
u32x8 {
val: crate::support::Aligned256(uint32x4x2_t(a.val.0.2, a.val.0.3)),
simd: self,
},
)
}
#[inline(always)]
fn load_interleaved_128_u32x16(self, src: &[u32; 16usize]) -> u32x16<Self> {
unsafe { vld4q_u32(src.as_ptr()).simd_into(self) }
}
#[inline(always)]
fn store_interleaved_128_u32x16(self, a: u32x16<Self>, dest: &mut [u32; 16usize]) -> () {
unsafe { vst4q_u32(dest.as_mut_ptr(), a.into()) }
}
#[inline(always)]
fn reinterpret_u8_u32x16(self, a: u32x16<Self>) -> u8x64<Self> {
let (a0, a1) = self.split_u32x16(a);
self.combine_u8x32(self.reinterpret_u8_u32x8(a0), self.reinterpret_u8_u32x8(a1))
}
#[inline(always)]
fn cvt_f32_u32x16(self, a: u32x16<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_u32x16(a);
self.combine_f32x8(self.cvt_f32_u32x8(a0), self.cvt_f32_u32x8(a1))
}
#[inline(always)]
fn splat_mask32x16(self, val: i32) -> mask32x16<Self> {
let half = self.splat_mask32x8(val);
self.combine_mask32x8(half, half)
}
#[inline(always)]
fn load_array_mask32x16(self, val: [i32; 16usize]) -> mask32x16<Self> {
mask32x16 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask32x16(self, val: &[i32; 16usize]) -> mask32x16<Self> {
mask32x16 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_mask32x16(self, a: mask32x16<Self>) -> [i32; 16usize] {
unsafe { core::mem::transmute::<int32x4x4_t, [i32; 16usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_mask32x16(self, a: &mask32x16<Self>) -> &[i32; 16usize] {
unsafe { core::mem::transmute::<&int32x4x4_t, &[i32; 16usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_mask32x16(self, a: &mut mask32x16<Self>) -> &mut [i32; 16usize] {
unsafe { core::mem::transmute::<&mut int32x4x4_t, &mut [i32; 16usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_mask32x16(self, a: mask32x16<Self>, dest: &mut [i32; 16usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i32,
dest.as_mut_ptr(),
16usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_mask32x16(self, a: u8x64<Self>) -> mask32x16<Self> {
unsafe {
mask32x16 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask32x16(self, a: mask32x16<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask32x16<const SHIFT: usize>(
self,
a: mask32x16<Self>,
b: mask32x16<Self>,
) -> mask32x16<Self> {
if SHIFT >= 16usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_mask32x16(a).val.0;
let b_bytes = self.cvt_to_bytes_mask32x16(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1, a_bytes.2, a_bytes.3];
let b_blocks = [b_bytes.0, b_bytes.1, b_bytes.2, b_bytes.3];
let shift_bytes = SHIFT * 4usize;
uint8x16x4_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
2,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
3,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_mask32x16(u8x64 {
val: crate::support::Aligned512(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_mask32x16<const SHIFT: usize>(
self,
a: mask32x16<Self>,
b: mask32x16<Self>,
) -> mask32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_mask32x16(b);
self.combine_mask32x8(
self.slide_within_blocks_mask32x8::<SHIFT>(a0, b0),
self.slide_within_blocks_mask32x8::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn and_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_mask32x16(b);
self.combine_mask32x8(self.and_mask32x8(a0, b0), self.and_mask32x8(a1, b1))
}
#[inline(always)]
fn or_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_mask32x16(b);
self.combine_mask32x8(self.or_mask32x8(a0, b0), self.or_mask32x8(a1, b1))
}
#[inline(always)]
fn xor_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_mask32x16(b);
self.combine_mask32x8(self.xor_mask32x8(a0, b0), self.xor_mask32x8(a1, b1))
}
#[inline(always)]
fn not_mask32x16(self, a: mask32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
self.combine_mask32x8(self.not_mask32x8(a0), self.not_mask32x8(a1))
}
#[inline(always)]
fn select_mask32x16(
self,
a: mask32x16<Self>,
b: mask32x16<Self>,
c: mask32x16<Self>,
) -> mask32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_mask32x16(b);
let (c0, c1) = self.split_mask32x16(c);
self.combine_mask32x8(
self.select_mask32x8(a0, b0, c0),
self.select_mask32x8(a1, b1, c1),
)
}
#[inline(always)]
fn simd_eq_mask32x16(self, a: mask32x16<Self>, b: mask32x16<Self>) -> mask32x16<Self> {
let (a0, a1) = self.split_mask32x16(a);
let (b0, b1) = self.split_mask32x16(b);
self.combine_mask32x8(self.simd_eq_mask32x8(a0, b0), self.simd_eq_mask32x8(a1, b1))
}
#[inline(always)]
fn any_true_mask32x16(self, a: mask32x16<Self>) -> bool {
let (a0, a1) = self.split_mask32x16(a);
self.any_true_mask32x8(a0) || self.any_true_mask32x8(a1)
}
#[inline(always)]
fn all_true_mask32x16(self, a: mask32x16<Self>) -> bool {
let (a0, a1) = self.split_mask32x16(a);
self.all_true_mask32x8(a0) && self.all_true_mask32x8(a1)
}
#[inline(always)]
fn any_false_mask32x16(self, a: mask32x16<Self>) -> bool {
let (a0, a1) = self.split_mask32x16(a);
self.any_false_mask32x8(a0) || self.any_false_mask32x8(a1)
}
#[inline(always)]
fn all_false_mask32x16(self, a: mask32x16<Self>) -> bool {
let (a0, a1) = self.split_mask32x16(a);
self.all_false_mask32x8(a0) && self.all_false_mask32x8(a1)
}
#[inline(always)]
fn split_mask32x16(self, a: mask32x16<Self>) -> (mask32x8<Self>, mask32x8<Self>) {
(
mask32x8 {
val: crate::support::Aligned256(int32x4x2_t(a.val.0.0, a.val.0.1)),
simd: self,
},
mask32x8 {
val: crate::support::Aligned256(int32x4x2_t(a.val.0.2, a.val.0.3)),
simd: self,
},
)
}
#[inline(always)]
fn splat_f64x8(self, val: f64) -> f64x8<Self> {
let half = self.splat_f64x4(val);
self.combine_f64x4(half, half)
}
#[inline(always)]
fn load_array_f64x8(self, val: [f64; 8usize]) -> f64x8<Self> {
f64x8 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_f64x8(self, val: &[f64; 8usize]) -> f64x8<Self> {
f64x8 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_f64x8(self, a: f64x8<Self>) -> [f64; 8usize] {
unsafe { core::mem::transmute::<float64x2x4_t, [f64; 8usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_f64x8(self, a: &f64x8<Self>) -> &[f64; 8usize] {
unsafe { core::mem::transmute::<&float64x2x4_t, &[f64; 8usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_f64x8(self, a: &mut f64x8<Self>) -> &mut [f64; 8usize] {
unsafe { core::mem::transmute::<&mut float64x2x4_t, &mut [f64; 8usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_f64x8(self, a: f64x8<Self>, dest: &mut [f64; 8usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const f64,
dest.as_mut_ptr(),
8usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_f64x8(self, a: u8x64<Self>) -> f64x8<Self> {
unsafe {
f64x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_f64x8(self, a: f64x8<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_f64x8<const SHIFT: usize>(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
if SHIFT >= 8usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_f64x8(a).val.0;
let b_bytes = self.cvt_to_bytes_f64x8(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1, a_bytes.2, a_bytes.3];
let b_blocks = [b_bytes.0, b_bytes.1, b_bytes.2, b_bytes.3];
let shift_bytes = SHIFT * 8usize;
uint8x16x4_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
2,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
3,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_f64x8(u8x64 {
val: crate::support::Aligned512(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_f64x8<const SHIFT: usize>(
self,
a: f64x8<Self>,
b: f64x8<Self>,
) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(
self.slide_within_blocks_f64x4::<SHIFT>(a0, b0),
self.slide_within_blocks_f64x4::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn abs_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f64x4(self.abs_f64x4(a0), self.abs_f64x4(a1))
}
#[inline(always)]
fn neg_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f64x4(self.neg_f64x4(a0), self.neg_f64x4(a1))
}
#[inline(always)]
fn sqrt_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f64x4(self.sqrt_f64x4(a0), self.sqrt_f64x4(a1))
}
#[inline(always)]
fn add_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.add_f64x4(a0, b0), self.add_f64x4(a1, b1))
}
#[inline(always)]
fn sub_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.sub_f64x4(a0, b0), self.sub_f64x4(a1, b1))
}
#[inline(always)]
fn mul_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.mul_f64x4(a0, b0), self.mul_f64x4(a1, b1))
}
#[inline(always)]
fn div_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.div_f64x4(a0, b0), self.div_f64x4(a1, b1))
}
#[inline(always)]
fn copysign_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.copysign_f64x4(a0, b0), self.copysign_f64x4(a1, b1))
}
#[inline(always)]
fn simd_eq_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_mask64x4(self.simd_eq_f64x4(a0, b0), self.simd_eq_f64x4(a1, b1))
}
#[inline(always)]
fn simd_lt_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_mask64x4(self.simd_lt_f64x4(a0, b0), self.simd_lt_f64x4(a1, b1))
}
#[inline(always)]
fn simd_le_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_mask64x4(self.simd_le_f64x4(a0, b0), self.simd_le_f64x4(a1, b1))
}
#[inline(always)]
fn simd_ge_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_mask64x4(self.simd_ge_f64x4(a0, b0), self.simd_ge_f64x4(a1, b1))
}
#[inline(always)]
fn simd_gt_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_mask64x4(self.simd_gt_f64x4(a0, b0), self.simd_gt_f64x4(a1, b1))
}
#[inline(always)]
fn zip_low_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, _) = self.split_f64x8(a);
let (b0, _) = self.split_f64x8(b);
self.combine_f64x4(self.zip_low_f64x4(a0, b0), self.zip_high_f64x4(a0, b0))
}
#[inline(always)]
fn zip_high_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (_, a1) = self.split_f64x8(a);
let (_, b1) = self.split_f64x8(b);
self.combine_f64x4(self.zip_low_f64x4(a1, b1), self.zip_high_f64x4(a1, b1))
}
#[inline(always)]
fn unzip_low_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.unzip_low_f64x4(a0, a1), self.unzip_low_f64x4(b0, b1))
}
#[inline(always)]
fn unzip_high_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.unzip_high_f64x4(a0, a1), self.unzip_high_f64x4(b0, b1))
}
#[inline(always)]
fn max_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.max_f64x4(a0, b0), self.max_f64x4(a1, b1))
}
#[inline(always)]
fn min_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(self.min_f64x4(a0, b0), self.min_f64x4(a1, b1))
}
#[inline(always)]
fn max_precise_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(
self.max_precise_f64x4(a0, b0),
self.max_precise_f64x4(a1, b1),
)
}
#[inline(always)]
fn min_precise_f64x8(self, a: f64x8<Self>, b: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
self.combine_f64x4(
self.min_precise_f64x4(a0, b0),
self.min_precise_f64x4(a1, b1),
)
}
#[inline(always)]
fn mul_add_f64x8(self, a: f64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
let (c0, c1) = self.split_f64x8(c);
self.combine_f64x4(
self.mul_add_f64x4(a0, b0, c0),
self.mul_add_f64x4(a1, b1, c1),
)
}
#[inline(always)]
fn mul_sub_f64x8(self, a: f64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
let (b0, b1) = self.split_f64x8(b);
let (c0, c1) = self.split_f64x8(c);
self.combine_f64x4(
self.mul_sub_f64x4(a0, b0, c0),
self.mul_sub_f64x4(a1, b1, c1),
)
}
#[inline(always)]
fn floor_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f64x4(self.floor_f64x4(a0), self.floor_f64x4(a1))
}
#[inline(always)]
fn ceil_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f64x4(self.ceil_f64x4(a0), self.ceil_f64x4(a1))
}
#[inline(always)]
fn round_ties_even_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f64x4(
self.round_ties_even_f64x4(a0),
self.round_ties_even_f64x4(a1),
)
}
#[inline(always)]
fn fract_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f64x4(self.fract_f64x4(a0), self.fract_f64x4(a1))
}
#[inline(always)]
fn trunc_f64x8(self, a: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f64x4(self.trunc_f64x4(a0), self.trunc_f64x4(a1))
}
#[inline(always)]
fn select_f64x8(self, a: mask64x8<Self>, b: f64x8<Self>, c: f64x8<Self>) -> f64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
let (b0, b1) = self.split_f64x8(b);
let (c0, c1) = self.split_f64x8(c);
self.combine_f64x4(self.select_f64x4(a0, b0, c0), self.select_f64x4(a1, b1, c1))
}
#[inline(always)]
fn split_f64x8(self, a: f64x8<Self>) -> (f64x4<Self>, f64x4<Self>) {
(
f64x4 {
val: crate::support::Aligned256(float64x2x2_t(a.val.0.0, a.val.0.1)),
simd: self,
},
f64x4 {
val: crate::support::Aligned256(float64x2x2_t(a.val.0.2, a.val.0.3)),
simd: self,
},
)
}
#[inline(always)]
fn reinterpret_f32_f64x8(self, a: f64x8<Self>) -> f32x16<Self> {
let (a0, a1) = self.split_f64x8(a);
self.combine_f32x8(
self.reinterpret_f32_f64x4(a0),
self.reinterpret_f32_f64x4(a1),
)
}
#[inline(always)]
fn splat_mask64x8(self, val: i64) -> mask64x8<Self> {
let half = self.splat_mask64x4(val);
self.combine_mask64x4(half, half)
}
#[inline(always)]
fn load_array_mask64x8(self, val: [i64; 8usize]) -> mask64x8<Self> {
mask64x8 {
val: unsafe { core::mem::transmute_copy(&val) },
simd: self,
}
}
#[inline(always)]
fn load_array_ref_mask64x8(self, val: &[i64; 8usize]) -> mask64x8<Self> {
mask64x8 {
val: unsafe { core::mem::transmute_copy(val) },
simd: self,
}
}
#[inline(always)]
fn as_array_mask64x8(self, a: mask64x8<Self>) -> [i64; 8usize] {
unsafe { core::mem::transmute::<int64x2x4_t, [i64; 8usize]>(a.val.0) }
}
#[inline(always)]
fn as_array_ref_mask64x8(self, a: &mask64x8<Self>) -> &[i64; 8usize] {
unsafe { core::mem::transmute::<&int64x2x4_t, &[i64; 8usize]>(&a.val.0) }
}
#[inline(always)]
fn as_array_mut_mask64x8(self, a: &mut mask64x8<Self>) -> &mut [i64; 8usize] {
unsafe { core::mem::transmute::<&mut int64x2x4_t, &mut [i64; 8usize]>(&mut a.val.0) }
}
#[inline(always)]
fn store_array_mask64x8(self, a: mask64x8<Self>, dest: &mut [i64; 8usize]) -> () {
unsafe {
core::ptr::copy_nonoverlapping(
(&raw const a.val.0) as *const i64,
dest.as_mut_ptr(),
8usize,
);
}
}
#[inline(always)]
fn cvt_from_bytes_mask64x8(self, a: u8x64<Self>) -> mask64x8<Self> {
unsafe {
mask64x8 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn cvt_to_bytes_mask64x8(self, a: mask64x8<Self>) -> u8x64<Self> {
unsafe {
u8x64 {
val: core::mem::transmute(a.val),
simd: self,
}
}
}
#[inline(always)]
fn slide_mask64x8<const SHIFT: usize>(
self,
a: mask64x8<Self>,
b: mask64x8<Self>,
) -> mask64x8<Self> {
if SHIFT >= 8usize {
return b;
}
let result = unsafe {
let a_bytes = self.cvt_to_bytes_mask64x8(a).val.0;
let b_bytes = self.cvt_to_bytes_mask64x8(b).val.0;
let a_blocks = [a_bytes.0, a_bytes.1, a_bytes.2, a_bytes.3];
let b_blocks = [b_bytes.0, b_bytes.1, b_bytes.2, b_bytes.3];
let shift_bytes = SHIFT * 8usize;
uint8x16x4_t(
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
0,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
1,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
2,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
{
let [lo, hi] = crate::support::cross_block_slide_blocks_at(
&a_blocks,
&b_blocks,
3,
shift_bytes,
);
dyn_vext_128(lo, hi, shift_bytes % 16)
},
)
};
self.cvt_from_bytes_mask64x8(u8x64 {
val: crate::support::Aligned512(result),
simd: self,
})
}
#[inline(always)]
fn slide_within_blocks_mask64x8<const SHIFT: usize>(
self,
a: mask64x8<Self>,
b: mask64x8<Self>,
) -> mask64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
let (b0, b1) = self.split_mask64x8(b);
self.combine_mask64x4(
self.slide_within_blocks_mask64x4::<SHIFT>(a0, b0),
self.slide_within_blocks_mask64x4::<SHIFT>(a1, b1),
)
}
#[inline(always)]
fn and_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
let (b0, b1) = self.split_mask64x8(b);
self.combine_mask64x4(self.and_mask64x4(a0, b0), self.and_mask64x4(a1, b1))
}
#[inline(always)]
fn or_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
let (b0, b1) = self.split_mask64x8(b);
self.combine_mask64x4(self.or_mask64x4(a0, b0), self.or_mask64x4(a1, b1))
}
#[inline(always)]
fn xor_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
let (b0, b1) = self.split_mask64x8(b);
self.combine_mask64x4(self.xor_mask64x4(a0, b0), self.xor_mask64x4(a1, b1))
}
#[inline(always)]
fn not_mask64x8(self, a: mask64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
self.combine_mask64x4(self.not_mask64x4(a0), self.not_mask64x4(a1))
}
#[inline(always)]
fn select_mask64x8(
self,
a: mask64x8<Self>,
b: mask64x8<Self>,
c: mask64x8<Self>,
) -> mask64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
let (b0, b1) = self.split_mask64x8(b);
let (c0, c1) = self.split_mask64x8(c);
self.combine_mask64x4(
self.select_mask64x4(a0, b0, c0),
self.select_mask64x4(a1, b1, c1),
)
}
#[inline(always)]
fn simd_eq_mask64x8(self, a: mask64x8<Self>, b: mask64x8<Self>) -> mask64x8<Self> {
let (a0, a1) = self.split_mask64x8(a);
let (b0, b1) = self.split_mask64x8(b);
self.combine_mask64x4(self.simd_eq_mask64x4(a0, b0), self.simd_eq_mask64x4(a1, b1))
}
#[inline(always)]
fn any_true_mask64x8(self, a: mask64x8<Self>) -> bool {
let (a0, a1) = self.split_mask64x8(a);
self.any_true_mask64x4(a0) || self.any_true_mask64x4(a1)
}
#[inline(always)]
fn all_true_mask64x8(self, a: mask64x8<Self>) -> bool {
let (a0, a1) = self.split_mask64x8(a);
self.all_true_mask64x4(a0) && self.all_true_mask64x4(a1)
}
#[inline(always)]
fn any_false_mask64x8(self, a: mask64x8<Self>) -> bool {
let (a0, a1) = self.split_mask64x8(a);
self.any_false_mask64x4(a0) || self.any_false_mask64x4(a1)
}
#[inline(always)]
fn all_false_mask64x8(self, a: mask64x8<Self>) -> bool {
let (a0, a1) = self.split_mask64x8(a);
self.all_false_mask64x4(a0) && self.all_false_mask64x4(a1)
}
#[inline(always)]
fn split_mask64x8(self, a: mask64x8<Self>) -> (mask64x4<Self>, mask64x4<Self>) {
(
mask64x4 {
val: crate::support::Aligned256(int64x2x2_t(a.val.0.0, a.val.0.1)),
simd: self,
},
mask64x4 {
val: crate::support::Aligned256(int64x2x2_t(a.val.0.2, a.val.0.3)),
simd: self,
},
)
}
}
impl<S: Simd> SimdFrom<float32x4_t, S> for f32x4<S> {
#[inline(always)]
fn simd_from(simd: S, arch: float32x4_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<f32x4<S>> for float32x4_t {
#[inline(always)]
fn from(value: f32x4<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int8x16_t, S> for i8x16<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int8x16_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<i8x16<S>> for int8x16_t {
#[inline(always)]
fn from(value: i8x16<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<uint8x16_t, S> for u8x16<S> {
#[inline(always)]
fn simd_from(simd: S, arch: uint8x16_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<u8x16<S>> for uint8x16_t {
#[inline(always)]
fn from(value: u8x16<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int8x16_t, S> for mask8x16<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int8x16_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<mask8x16<S>> for int8x16_t {
#[inline(always)]
fn from(value: mask8x16<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int16x8_t, S> for i16x8<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int16x8_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<i16x8<S>> for int16x8_t {
#[inline(always)]
fn from(value: i16x8<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<uint16x8_t, S> for u16x8<S> {
#[inline(always)]
fn simd_from(simd: S, arch: uint16x8_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<u16x8<S>> for uint16x8_t {
#[inline(always)]
fn from(value: u16x8<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int16x8_t, S> for mask16x8<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int16x8_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<mask16x8<S>> for int16x8_t {
#[inline(always)]
fn from(value: mask16x8<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int32x4_t, S> for i32x4<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int32x4_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<i32x4<S>> for int32x4_t {
#[inline(always)]
fn from(value: i32x4<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<uint32x4_t, S> for u32x4<S> {
#[inline(always)]
fn simd_from(simd: S, arch: uint32x4_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<u32x4<S>> for uint32x4_t {
#[inline(always)]
fn from(value: u32x4<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int32x4_t, S> for mask32x4<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int32x4_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<mask32x4<S>> for int32x4_t {
#[inline(always)]
fn from(value: mask32x4<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<float64x2_t, S> for f64x2<S> {
#[inline(always)]
fn simd_from(simd: S, arch: float64x2_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<f64x2<S>> for float64x2_t {
#[inline(always)]
fn from(value: f64x2<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int64x2_t, S> for mask64x2<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int64x2_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<mask64x2<S>> for int64x2_t {
#[inline(always)]
fn from(value: mask64x2<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<float32x4x2_t, S> for f32x8<S> {
#[inline(always)]
fn simd_from(simd: S, arch: float32x4x2_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<f32x8<S>> for float32x4x2_t {
#[inline(always)]
fn from(value: f32x8<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int8x16x2_t, S> for i8x32<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int8x16x2_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<i8x32<S>> for int8x16x2_t {
#[inline(always)]
fn from(value: i8x32<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<uint8x16x2_t, S> for u8x32<S> {
#[inline(always)]
fn simd_from(simd: S, arch: uint8x16x2_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<u8x32<S>> for uint8x16x2_t {
#[inline(always)]
fn from(value: u8x32<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int8x16x2_t, S> for mask8x32<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int8x16x2_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<mask8x32<S>> for int8x16x2_t {
#[inline(always)]
fn from(value: mask8x32<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int16x8x2_t, S> for i16x16<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int16x8x2_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<i16x16<S>> for int16x8x2_t {
#[inline(always)]
fn from(value: i16x16<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<uint16x8x2_t, S> for u16x16<S> {
#[inline(always)]
fn simd_from(simd: S, arch: uint16x8x2_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<u16x16<S>> for uint16x8x2_t {
#[inline(always)]
fn from(value: u16x16<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int16x8x2_t, S> for mask16x16<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int16x8x2_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<mask16x16<S>> for int16x8x2_t {
#[inline(always)]
fn from(value: mask16x16<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int32x4x2_t, S> for i32x8<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int32x4x2_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<i32x8<S>> for int32x4x2_t {
#[inline(always)]
fn from(value: i32x8<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<uint32x4x2_t, S> for u32x8<S> {
#[inline(always)]
fn simd_from(simd: S, arch: uint32x4x2_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<u32x8<S>> for uint32x4x2_t {
#[inline(always)]
fn from(value: u32x8<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int32x4x2_t, S> for mask32x8<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int32x4x2_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<mask32x8<S>> for int32x4x2_t {
#[inline(always)]
fn from(value: mask32x8<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<float64x2x2_t, S> for f64x4<S> {
#[inline(always)]
fn simd_from(simd: S, arch: float64x2x2_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<f64x4<S>> for float64x2x2_t {
#[inline(always)]
fn from(value: f64x4<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int64x2x2_t, S> for mask64x4<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int64x2x2_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<mask64x4<S>> for int64x2x2_t {
#[inline(always)]
fn from(value: mask64x4<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<float32x4x4_t, S> for f32x16<S> {
#[inline(always)]
fn simd_from(simd: S, arch: float32x4x4_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<f32x16<S>> for float32x4x4_t {
#[inline(always)]
fn from(value: f32x16<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int8x16x4_t, S> for i8x64<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int8x16x4_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<i8x64<S>> for int8x16x4_t {
#[inline(always)]
fn from(value: i8x64<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<uint8x16x4_t, S> for u8x64<S> {
#[inline(always)]
fn simd_from(simd: S, arch: uint8x16x4_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<u8x64<S>> for uint8x16x4_t {
#[inline(always)]
fn from(value: u8x64<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int8x16x4_t, S> for mask8x64<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int8x16x4_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<mask8x64<S>> for int8x16x4_t {
#[inline(always)]
fn from(value: mask8x64<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int16x8x4_t, S> for i16x32<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int16x8x4_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<i16x32<S>> for int16x8x4_t {
#[inline(always)]
fn from(value: i16x32<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<uint16x8x4_t, S> for u16x32<S> {
#[inline(always)]
fn simd_from(simd: S, arch: uint16x8x4_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<u16x32<S>> for uint16x8x4_t {
#[inline(always)]
fn from(value: u16x32<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int16x8x4_t, S> for mask16x32<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int16x8x4_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<mask16x32<S>> for int16x8x4_t {
#[inline(always)]
fn from(value: mask16x32<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int32x4x4_t, S> for i32x16<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int32x4x4_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<i32x16<S>> for int32x4x4_t {
#[inline(always)]
fn from(value: i32x16<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<uint32x4x4_t, S> for u32x16<S> {
#[inline(always)]
fn simd_from(simd: S, arch: uint32x4x4_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<u32x16<S>> for uint32x4x4_t {
#[inline(always)]
fn from(value: u32x16<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int32x4x4_t, S> for mask32x16<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int32x4x4_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<mask32x16<S>> for int32x4x4_t {
#[inline(always)]
fn from(value: mask32x16<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<float64x2x4_t, S> for f64x8<S> {
#[inline(always)]
fn simd_from(simd: S, arch: float64x2x4_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<f64x8<S>> for float64x2x4_t {
#[inline(always)]
fn from(value: f64x8<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
impl<S: Simd> SimdFrom<int64x2x4_t, S> for mask64x8<S> {
#[inline(always)]
fn simd_from(simd: S, arch: int64x2x4_t) -> Self {
Self {
val: unsafe { core::mem::transmute_copy(&arch) },
simd,
}
}
}
impl<S: Simd> From<mask64x8<S>> for int64x2x4_t {
#[inline(always)]
fn from(value: mask64x8<S>) -> Self {
unsafe { core::mem::transmute_copy(&value.val) }
}
}
#[doc = r" This is a version of the `vext` intrinsic that takes a non-const shift argument. The shift is still"]
#[doc = r" expected to be constant in practice, so the match statement will be optimized out. This exists because"]
#[doc = r" Rust doesn't currently let you do math on const generics."]
#[inline(always)]
unsafe fn dyn_vext_128(a: uint8x16_t, b: uint8x16_t, shift: usize) -> uint8x16_t {
unsafe {
match shift {
0usize => vextq_u8::<0i32>(a, b),
1usize => vextq_u8::<1i32>(a, b),
2usize => vextq_u8::<2i32>(a, b),
3usize => vextq_u8::<3i32>(a, b),
4usize => vextq_u8::<4i32>(a, b),
5usize => vextq_u8::<5i32>(a, b),
6usize => vextq_u8::<6i32>(a, b),
7usize => vextq_u8::<7i32>(a, b),
8usize => vextq_u8::<8i32>(a, b),
9usize => vextq_u8::<9i32>(a, b),
10usize => vextq_u8::<10i32>(a, b),
11usize => vextq_u8::<11i32>(a, b),
12usize => vextq_u8::<12i32>(a, b),
13usize => vextq_u8::<13i32>(a, b),
14usize => vextq_u8::<14i32>(a, b),
15usize => vextq_u8::<15i32>(a, b),
_ => unreachable!(),
}
}
}