use std::arch::aarch64::*;
use half::f16;
use crate::{
LoHi, SplitJoin,
doubled::{self, Doubled},
};
use super::{
f16x8, f32x4, i8x16, i16x8, i32x4,
masks::{mask8x16, mask16x8, mask32x4, mask64x2},
u8x16, u32x4, u64x2,
};
doubled::double_mask!(32, mask8x16);
doubled::double_mask!(16, mask16x8);
doubled::double_mask!(8, mask32x4);
doubled::double_mask!(4, mask64x2);
doubled::double_mask!(64, Doubled<mask8x16>);
doubled::double_mask!(32, Doubled<mask16x8>);
doubled::double_mask!(16, Doubled<mask32x4>);
macro_rules! double_alias {
($type:ident, $scalar:ty, $lanes:literal, $subtype:ty) => {
doubled::double_vector!($scalar, $lanes, $subtype);
#[allow(non_camel_case_types)]
pub type $type = Doubled<$subtype>;
};
}
double_alias!(f32x8, f32, 8, f32x4);
double_alias!(u8x32, u8, 32, u8x16);
double_alias!(u32x8, u32, 8, u32x4);
double_alias!(u64x4, u64, 4, u64x2);
double_alias!(i8x32, i8, 32, i8x16);
double_alias!(i16x16, i16, 16, i16x8);
double_alias!(i32x8, i32, 8, i32x4);
doubled::double_scalar_shift!(Doubled<u8x16>);
doubled::double_scalar_shift!(Doubled<u32x4>);
doubled::double_scalar_shift!(Doubled<u64x2>);
doubled::double_scalar_shift!(Doubled<i8x16>);
doubled::double_scalar_shift!(Doubled<i16x8>);
doubled::double_scalar_shift!(Doubled<i32x4>);
double_alias!(f32x16, f32, 16, f32x8);
double_alias!(f16x16, f16, 16, f16x8);
double_alias!(u8x64, u8, 64, u8x32);
double_alias!(u32x16, u32, 16, u32x8);
double_alias!(i8x64, i8, 64, i8x32);
double_alias!(i16x32, i16, 32, i16x16);
double_alias!(i32x16, i32, 16, i32x8);
doubled::double_scalar_shift!(Doubled<Doubled<u8x16>>);
doubled::double_scalar_shift!(Doubled<Doubled<u32x4>>);
doubled::double_scalar_shift!(Doubled<Doubled<u64x2>>);
doubled::double_scalar_shift!(Doubled<Doubled<i8x16>>);
doubled::double_scalar_shift!(Doubled<Doubled<i16x8>>);
doubled::double_scalar_shift!(Doubled<Doubled<i32x4>>);
super::macros::aarch64_zipunzip!(i8x16, vzip1q_s8, vzip2q_s8, vuzp1q_s8, vuzp2q_s8);
super::macros::aarch64_zipunzip!(i16x8, vzip1q_s16, vzip2q_s16, vuzp1q_s16, vuzp2q_s16);
super::macros::aarch64_zipunzip!(i32x4, vzip1q_s32, vzip2q_s32, vuzp1q_s32, vuzp2q_s32);
super::macros::aarch64_zipunzip!(u8x16, vzip1q_u8, vzip2q_u8, vuzp1q_u8, vuzp2q_u8);
super::macros::aarch64_zipunzip!(u32x4, vzip1q_u32, vzip2q_u32, vuzp1q_u32, vuzp2q_u32);
super::macros::aarch64_zipunzip!(f16x8, vzip1q_u16, vzip2q_u16, vuzp1q_u16, vuzp2q_u16);
impl From<f16x8> for f32x8 {
#[inline(always)]
fn from(value: f16x8) -> Self {
let LoHi { lo, hi } = value.split();
Self::new(lo.into(), hi.into())
}
}
impl From<f16x16> for f32x16 {
#[inline(always)]
fn from(value: f16x16) -> Self {
Self::new(value.0.into(), value.1.into())
}
}
impl From<u8x16> for i16x16 {
#[inline(always)]
fn from(value: u8x16) -> Self {
let LoHi { lo, hi } = value.split();
Self::new(lo.into(), hi.into())
}
}
impl From<u8x32> for i16x32 {
#[inline(always)]
fn from(value: u8x32) -> Self {
Self::new(value.0.into(), value.1.into())
}
}
impl From<i8x16> for i16x16 {
#[inline(always)]
fn from(value: i8x16) -> Self {
let LoHi { lo, hi } = value.split();
Self::new(lo.into(), hi.into())
}
}
impl From<i8x32> for i16x32 {
#[inline(always)]
fn from(value: i8x32) -> Self {
Self::new(value.0.into(), value.1.into())
}
}
impl crate::SIMDCast<f32> for f16x16 {
type Cast = f32x16;
#[inline(always)]
fn simd_cast(self) -> f32x16 {
self.into()
}
}
impl crate::SIMDCast<f16> for f32x8 {
type Cast = f16x8;
#[inline(always)]
fn simd_cast(self) -> f16x8 {
f16x8::join(LoHi::new(self.0.simd_cast(), self.1.simd_cast()))
}
}
impl crate::SIMDCast<f16> for f32x16 {
type Cast = f16x16;
#[inline(always)]
fn simd_cast(self) -> f16x16 {
f16x16::new(self.0.simd_cast(), self.1.simd_cast())
}
}
impl crate::SIMDCast<f32> for i32x8 {
type Cast = f32x8;
#[inline(always)]
fn simd_cast(self) -> f32x8 {
f32x8::new(self.0.simd_cast(), self.1.simd_cast())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{arch::aarch64::test_neon, reference::ReferenceScalarOps, test_utils};
macro_rules! standard_tests {
($type:ident, $scalar:ty, $lanes:literal) => {
#[test]
fn miri_test_load() {
if let Some(arch) = test_neon() {
test_utils::test_load_simd::<$scalar, $lanes, $type>(arch);
}
}
#[test]
fn miri_test_store() {
if let Some(arch) = test_neon() {
test_utils::test_store_simd::<$scalar, $lanes, $type>(arch);
}
}
#[test]
fn test_constructors() {
if let Some(arch) = test_neon() {
test_utils::ops::test_splat::<$scalar, $lanes, $type>(arch);
}
}
test_utils::ops::test_add!($type, 0x1c08175714ae637e, test_neon());
test_utils::ops::test_sub!($type, 0x3746ddcb006b7b4c, test_neon());
test_utils::ops::test_mul!($type, 0xde99e62aaea3f38a, test_neon());
test_utils::ops::test_fma!($type, 0x2e301b7e12090d5c, test_neon());
test_utils::ops::test_cmp!($type, 0x90a59e23ad545de1, test_neon());
};
}
mod test_f32x8 {
use super::*;
standard_tests!(f32x8, f32, 8);
test_utils::ops::test_sumtree!(f32x8, 0x90a59e23ad545de1, test_neon());
test_utils::ops::test_splitjoin!(f32x8 => f32x4, 0x2e301b7e12090d5c, test_neon());
test_utils::ops::test_minmax!(f32x8, 0x6d7fc8ed6d852187, test_neon());
test_utils::ops::test_abs!(f32x8, 0xb8f702ba85375041, test_neon());
}
mod test_f32x16 {
use super::*;
standard_tests!(f32x16, f32, 16);
test_utils::ops::test_sumtree!(f32x16, 0x90a59e23ad545de1, test_neon());
test_utils::ops::test_splitjoin!(f32x16 => f32x8, 0x2e301b7e12090d5c, test_neon());
test_utils::ops::test_minmax!(f32x16, 0x6d7fc8ed6d852187, test_neon());
test_utils::ops::test_abs!(f32x16, 0xb8f702ba85375041, test_neon());
}
mod test_u8x32 {
use super::*;
standard_tests!(u8x32, u8, 32);
test_utils::ops::test_bitops!(u8x32, 0xd62d8de09f82ed4e, test_neon());
test_utils::ops::test_splitjoin!(u8x32 => u8x16, 0x2e301b7e12090d5c, test_neon());
test_utils::ops::test_zipunzip!(u8x32 => u8x16, 0xa1b2c3d4e5f67890, test_neon());
}
mod test_u8x64 {
use super::*;
standard_tests!(u8x64, u8, 64);
test_utils::ops::test_bitops!(u8x64, 0xd62d8de09f82ed4e, test_neon());
test_utils::ops::test_splitjoin!(u8x64 => u8x32, 0x2e301b7e12090d5c, test_neon());
}
mod test_u32x8 {
use super::*;
standard_tests!(u32x8, u32, 8);
test_utils::ops::test_bitops!(u32x8, 0xd62d8de09f82ed4e, test_neon());
test_utils::ops::test_sumtree!(u32x8, 0x90a59e23ad545de1, test_neon());
test_utils::ops::test_splitjoin!(u32x8 => u32x4, 0x2e301b7e12090d5c, test_neon());
test_utils::ops::test_zipunzip!(u32x8 => u32x4, 0x4e7c0a3d5b9f2816, test_neon());
}
mod test_u32x16 {
use super::*;
standard_tests!(u32x16, u32, 16);
test_utils::ops::test_bitops!(u32x16, 0xd62d8de09f82ed4e, test_neon());
test_utils::ops::test_sumtree!(u32x16, 0x90a59e23ad545de1, test_neon());
test_utils::ops::test_splitjoin!(u32x16 => u32x8, 0x2e301b7e12090d5c, test_neon());
}
mod test_u64x4 {
use super::*;
standard_tests!(u64x4, u64, 4);
test_utils::ops::test_bitops!(u64x4, 0xc4491a44af4aa58e, test_neon());
test_utils::ops::test_splitjoin!(u64x4 => u64x2, 0x2e301b7e12090d5c, test_neon());
}
mod test_i8x32 {
use super::*;
standard_tests!(i8x32, i8, 32);
test_utils::ops::test_bitops!(i8x32, 0xd62d8de09f82ed4e, test_neon());
test_utils::ops::test_abs!(i8x32, 0xd62d8de09f82ed4e, test_neon());
test_utils::ops::test_splitjoin!(i8x32 => i8x16, 0x2e301b7e12090d5c, test_neon());
test_utils::ops::test_zipunzip!(i8x32 => i8x16, 0xc7e3a92f1d8b5604, test_neon());
}
mod test_i8x64 {
use super::*;
standard_tests!(i8x64, i8, 64);
test_utils::ops::test_bitops!(i8x64, 0xd62d8de09f82ed4e, test_neon());
test_utils::ops::test_abs!(i8x64, 0xd62d8de09f82ed4e, test_neon());
test_utils::ops::test_splitjoin!(i8x64 => i8x32, 0x2e301b7e12090d5c, test_neon());
}
mod test_i16x16 {
use super::*;
standard_tests!(i16x16, i16, 16);
test_utils::ops::test_bitops!(i16x16, 0x9167644fc4ad5cfa, test_neon());
test_utils::ops::test_abs!(i16x16, 0x9167644fc4ad5cfa, test_neon());
test_utils::ops::test_splitjoin!(i16x16 => i16x8, 0x2e301b7e12090d5c, test_neon());
test_utils::ops::test_zipunzip!(i16x16 => i16x8, 0x3f84d1b6e7a20c59, test_neon());
}
mod test_i16x32 {
use super::*;
standard_tests!(i16x32, i16, 32);
test_utils::ops::test_bitops!(i16x32, 0x9167644fc4ad5cfa, test_neon());
test_utils::ops::test_abs!(i16x32, 0x9167644fc4ad5cfa, test_neon());
test_utils::ops::test_splitjoin!(i16x32 => i16x16, 0x2e301b7e12090d5c, test_neon());
}
mod test_i32x8 {
use super::*;
standard_tests!(i32x8, i32, 8);
test_utils::ops::test_bitops!(i32x8, 0xc4491a44af4aa58e, test_neon());
test_utils::ops::test_abs!(i32x8, 0xc4491a44af4aa58e, test_neon());
test_utils::dot_product::test_dot_product!(
(i16x16, i16x16) => i32x8,
0x145f89b446c03ff1,
test_neon()
);
test_utils::dot_product::test_dot_product!(
(u8x32, i8x32) => i32x8,
0x145f89b446c03ff1,
test_neon()
);
test_utils::dot_product::test_dot_product!(
(i8x32, u8x32) => i32x8,
0x145f89b446c03ff1,
test_neon()
);
test_utils::ops::test_sumtree!(i32x8, 0x90a59e23ad545de1, test_neon());
test_utils::ops::test_splitjoin!(i32x8 => i32x4, 0x2e301b7e12090d5c, test_neon());
test_utils::ops::test_zipunzip!(i32x8 => i32x4, 0x92d5f4a83e1b07c6, test_neon());
}
mod test_i32x16 {
use super::*;
standard_tests!(i32x16, i32, 16);
test_utils::ops::test_bitops!(i32x16, 0xc4491a44af4aa58e, test_neon());
test_utils::ops::test_abs!(i32x16, 0xc4491a44af4aa58e, test_neon());
test_utils::dot_product::test_dot_product!(
(i16x32, i16x32) => i32x16,
0x145f89b446c03ff1,
test_neon()
);
test_utils::dot_product::test_dot_product!(
(u8x64, i8x64) => i32x16,
0x145f89b446c03ff1,
test_neon()
);
test_utils::dot_product::test_dot_product!(
(i8x64, u8x64) => i32x16,
0x145f89b446c03ff1,
test_neon()
);
test_utils::ops::test_sumtree!(i32x16, 0x90a59e23ad545de1, test_neon());
test_utils::ops::test_splitjoin!(i32x16 => i32x8, 0x2e301b7e12090d5c, test_neon());
}
test_utils::ops::test_lossless_convert!(f16x8 => f32x8, 0x84c1c6f05b169a20, test_neon());
test_utils::ops::test_lossless_convert!(f16x16 => f32x16, 0x84c1c6f05b169a20, test_neon());
test_utils::ops::test_lossless_convert!(u8x16 => i16x16, 0x84c1c6f05b169a20, test_neon());
test_utils::ops::test_lossless_convert!(u8x32 => i16x32, 0x84c1c6f05b169a20, test_neon());
test_utils::ops::test_lossless_convert!(i8x16 => i16x16, 0x84c1c6f05b169a20, test_neon());
test_utils::ops::test_lossless_convert!(i8x32 => i16x32, 0x84c1c6f05b169a20, test_neon());
test_utils::ops::test_cast!(f16x8 => f32x8, 0xba8fe343fc9dbeff, test_neon());
test_utils::ops::test_cast!(f16x16 => f32x16, 0xba8fe343fc9dbeff, test_neon());
test_utils::ops::test_cast!(f32x8 => f16x8, 0xba8fe343fc9dbeff, test_neon());
test_utils::ops::test_cast!(f32x16 => f16x16, 0xba8fe343fc9dbeff, test_neon());
test_utils::ops::test_cast!(i32x8 => f32x8, 0xba8fe343fc9dbeff, test_neon());
test_utils::ops::test_splitjoin!(f16x16 => f16x8, 0x2e301b7e12090d5c, test_neon());
test_utils::ops::test_zipunzip!(f16x16 => f16x8, 0x6b2e0f9d8a41c573, test_neon());
}