use crate::SIMDVector;
macro_rules! aarch64_define_register {
($type:ident, $impl:ty, $mask:ty, $scalar:ty, $lanes:literal, $arch:ty) => {
#[derive(Debug, Clone, Copy)]
#[allow(non_camel_case_types)]
#[repr(transparent)]
pub struct $type(pub $impl);
impl $type {
#[inline(always)]
pub fn emulated(self) -> $crate::Emulated<$scalar, $lanes> {
$crate::Emulated::from_array($crate::arch::Scalar, self.to_array())
}
}
impl $crate::AsSIMD<$type> for $crate::Emulated<$scalar, $lanes> {
#[inline(always)]
fn as_simd(self, arch: $arch) -> $type {
$type::from_array(arch, self.to_array())
}
}
impl SIMDVector for $type {
type Arch = $arch;
type Scalar = $scalar;
type Underlying = $impl;
type Mask = $mask;
type ConstLanes = Const<$lanes>;
const LANES: usize = $lanes;
const EMULATED: bool = false;
#[inline(always)]
fn arch(self) -> $arch {
unsafe { <$arch>::new() }
}
#[inline(always)]
fn default(arch: $arch) -> Self {
<Self as AArchSplat>::aarch_default(arch)
}
fn to_underlying(self) -> Self::Underlying {
self.0
}
fn from_underlying(_: $arch, repr: Self::Underlying) -> Self {
Self(repr)
}
fn to_array(self) -> [$scalar; $lanes] {
unsafe { std::mem::transmute::<Self, [$scalar; $lanes]>(self) }
}
fn from_array(_: $arch, x: [$scalar; $lanes]) -> Self {
unsafe { std::mem::transmute::<[$scalar; $lanes], Self>(x) }
}
#[inline(always)]
fn splat(arch: $arch, value: Self::Scalar) -> Self {
<Self as AArchSplat>::aarch_splat(arch, value)
}
#[inline(always)]
unsafe fn load_simd(arch: $arch, ptr: *const $scalar) -> Self {
unsafe { <Self as AArchLoadStore>::load_simd(arch, ptr) }
}
#[inline(always)]
unsafe fn load_simd_masked_logical(
arch: $arch,
ptr: *const $scalar,
mask: $mask,
) -> Self {
unsafe { <Self as AArchLoadStore>::load_simd_masked_logical(arch, ptr, mask) }
}
#[inline(always)]
unsafe fn load_simd_first(arch: $arch, ptr: *const $scalar, first: usize) -> Self {
unsafe { <Self as AArchLoadStore>::load_simd_first(arch, ptr, first) }
}
#[inline(always)]
unsafe fn store_simd(self, ptr: *mut $scalar) {
unsafe { <Self as AArchLoadStore>::store_simd(self, ptr) }
}
#[inline(always)]
unsafe fn store_simd_masked_logical(self, ptr: *mut $scalar, mask: $mask) {
unsafe { <Self as AArchLoadStore>::store_simd_masked_logical(self, ptr, mask) }
}
#[inline(always)]
unsafe fn store_simd_first(self, ptr: *mut $scalar, first: usize) {
unsafe { <Self as AArchLoadStore>::store_simd_first(self, ptr, first) }
}
}
};
}
pub(super) trait AArchSplat: SIMDVector {
fn aarch_splat(arch: <Self as SIMDVector>::Arch, value: <Self as SIMDVector>::Scalar) -> Self;
fn aarch_default(arch: <Self as SIMDVector>::Arch) -> Self;
}
pub(super) trait AArchLoadStore: SIMDVector {
unsafe fn load_simd(
arch: <Self as SIMDVector>::Arch,
ptr: *const <Self as SIMDVector>::Scalar,
) -> Self;
unsafe fn load_simd_masked_logical(
arch: <Self as SIMDVector>::Arch,
ptr: *const <Self as SIMDVector>::Scalar,
mask: Self::Mask,
) -> Self;
unsafe fn load_simd_first(
arch: <Self as SIMDVector>::Arch,
ptr: *const <Self as SIMDVector>::Scalar,
first: usize,
) -> Self;
unsafe fn store_simd(self, ptr: *mut <Self as SIMDVector>::Scalar);
unsafe fn store_simd_masked_logical(
self,
ptr: *mut <Self as SIMDVector>::Scalar,
mask: Self::Mask,
);
unsafe fn store_simd_first(self, ptr: *mut <Self as SIMDVector>::Scalar, first: usize);
}
macro_rules! aarch64_define_splat {
($type:ty, $intrinsic:expr) => {
impl AArchSplat for $type {
#[inline(always)]
fn aarch_splat(
_arch: <Self as SIMDVector>::Arch,
value: <Self as SIMDVector>::Scalar,
) -> Self {
Self(unsafe { $intrinsic(value) })
}
#[inline(always)]
fn aarch_default(arch: <Self as SIMDVector>::Arch) -> Self {
Self::aarch_splat(arch, <Self as SIMDVector>::Scalar::default())
}
}
};
}
macro_rules! aarch64_define_loadstore {
($type:ty, $load:expr, $load_first:expr, $store:expr, $lanes:literal) => {
impl AArchLoadStore for $type {
#[inline(always)]
unsafe fn load_simd(
_arch: <Self as SIMDVector>::Arch,
ptr: *const <Self as SIMDVector>::Scalar,
) -> Self {
Self(unsafe { $load(ptr) })
}
#[inline(always)]
unsafe fn load_simd_masked_logical(
arch: <Self as SIMDVector>::Arch,
ptr: *const <Self as SIMDVector>::Scalar,
mask: Self::Mask,
) -> Self {
let e = unsafe {
Emulated::<_, $lanes>::load_simd_masked_logical(
$crate::arch::Scalar,
ptr,
mask.bitmask().as_scalar(),
)
};
Self::from_array(arch, e.to_array())
}
#[inline(always)]
unsafe fn load_simd_first(
arch: <Self as SIMDVector>::Arch,
ptr: *const <Self as SIMDVector>::Scalar,
first: usize,
) -> Self {
Self(unsafe { ($load_first)(arch, ptr, first) })
}
#[inline(always)]
unsafe fn store_simd(self, ptr: *mut <Self as SIMDVector>::Scalar) {
unsafe { $store(ptr, self.0) }
}
unsafe fn store_simd_masked_logical(
self,
ptr: *mut <Self as SIMDVector>::Scalar,
mask: Self::Mask,
) {
let e = Emulated::<_, $lanes>::from_array($crate::arch::Scalar, self.to_array());
unsafe { e.store_simd_masked_logical(ptr, mask.bitmask().as_scalar()) }
}
#[inline(always)]
unsafe fn store_simd_first(self, ptr: *mut <Self as SIMDVector>::Scalar, first: usize) {
let e = Emulated::<_, $lanes>::from_array($crate::arch::Scalar, self.to_array());
unsafe { e.store_simd_first(ptr, first) }
}
}
};
}
macro_rules! aarch64_define_cmp {
($type:ty, $eq:ident, ($not:expr), $lt:ident, $le:ident, $gt:ident, $ge:ident) => {
impl SIMDPartialEq for $type {
#[inline(always)]
fn eq_simd(self, other: Self) -> Self::Mask {
Self::Mask::from_underlying(self.arch(), unsafe { $eq(self.0, other.0) })
}
#[inline(always)]
fn ne_simd(self, other: Self) -> Self::Mask {
Self::Mask::from_underlying(self.arch(), unsafe { $not($eq(self.0, other.0)) })
}
}
impl SIMDPartialOrd for $type {
#[inline(always)]
fn lt_simd(self, other: Self) -> Self::Mask {
Self::Mask::from_underlying(self.arch(), unsafe { $lt(self.0, other.0) })
}
#[inline(always)]
fn le_simd(self, other: Self) -> Self::Mask {
Self::Mask::from_underlying(self.arch(), unsafe { $le(self.0, other.0) })
}
#[inline(always)]
fn gt_simd(self, other: Self) -> Self::Mask {
Self::Mask::from_underlying(self.arch(), unsafe { $gt(self.0, other.0) })
}
#[inline(always)]
fn ge_simd(self, other: Self) -> Self::Mask {
Self::Mask::from_underlying(self.arch(), unsafe { $ge(self.0, other.0) })
}
}
};
}
macro_rules! aarch64_define_fma {
($type:ty, integer) => {
impl SIMDMulAdd for $type {
#[inline(always)]
fn mul_add_simd(self, rhs: Self, accumulator: Self) -> $type {
self * rhs + accumulator
}
}
};
($type:ty, $intrinsic:expr) => {
impl SIMDMulAdd for $type {
#[inline(always)]
fn mul_add_simd(self, rhs: Self, accumulator: Self) -> $type {
Self(unsafe { $intrinsic(accumulator.0, self.0, rhs.0) })
}
}
};
}
macro_rules! aarch64_define_bitops {
($type:ty,
$not:ident,
$and:ident,
$or:ident,
$xor:ident,
($shlv:ident, $mask:literal, $neg:ident, $min:ident, $cvtpost:path, $cvtpre:path),
($unsigned:ty, $signed:ty, $broadcast_signed:ident),
) => {
impl std::ops::Not for $type {
type Output = Self;
#[inline(always)]
fn not(self) -> Self {
Self(unsafe { $not(self.0) })
}
}
impl std::ops::BitAnd for $type {
type Output = Self;
#[inline(always)]
fn bitand(self, rhs: Self) -> Self {
Self(unsafe { $and(self.0, rhs.0) })
}
}
impl std::ops::BitOr for $type {
type Output = Self;
#[inline(always)]
fn bitor(self, rhs: Self) -> Self {
Self(unsafe { $or(self.0, rhs.0) })
}
}
impl std::ops::BitXor for $type {
type Output = Self;
#[inline(always)]
fn bitxor(self, rhs: Self) -> Self {
Self(unsafe { $xor(self.0, rhs.0) })
}
}
impl std::ops::Shr for $type {
type Output = Self;
#[inline(always)]
fn shr(self, rhs: Self) -> Self {
use $crate::AsSIMD;
if cfg!(miri) {
self.emulated().shr(rhs.emulated()).as_simd(self.arch())
} else {
Self(unsafe {
$shlv(
self.0,
$neg($cvtpost($min(
$cvtpre(rhs.0),
$cvtpre(<$type as SIMDVector>::splat(self.arch(), $mask).0),
))),
)
})
}
}
}
impl std::ops::Shl for $type {
type Output = Self;
#[inline(always)]
fn shl(self, rhs: Self) -> Self {
use $crate::AsSIMD;
if cfg!(miri) {
self.emulated().shl(rhs.emulated()).as_simd(self.arch())
} else {
Self(unsafe {
$shlv(
self.0,
$cvtpost($min(
$cvtpre(rhs.0),
$cvtpre(<$type as SIMDVector>::splat(self.arch(), $mask).0),
)),
)
})
}
}
}
impl std::ops::Shr<<$type as SIMDVector>::Scalar> for $type {
type Output = Self;
#[inline(always)]
fn shr(self, rhs: <$type as SIMDVector>::Scalar) -> Self {
use $crate::AsSIMD;
if cfg!(miri) {
self.emulated().shr(rhs).as_simd(self.arch())
} else {
Self(unsafe {
$shlv(
self.0,
$broadcast_signed(-((rhs as $unsigned).min($mask) as $signed)),
)
})
}
}
}
impl std::ops::Shl<<$type as SIMDVector>::Scalar> for $type {
type Output = Self;
#[inline(always)]
fn shl(self, rhs: <$type as SIMDVector>::Scalar) -> Self {
use $crate::AsSIMD;
if cfg!(miri) {
self.emulated().shl(rhs).as_simd(self.arch())
} else {
Self(unsafe {
$shlv(
self.0,
$broadcast_signed((rhs as $unsigned).min($mask) as $signed),
)
})
}
}
}
};
}
macro_rules! aarch64_splitjoin {
($type:path, $half:path, $getlo:ident, $gethi:ident, $join:ident) => {
impl $crate::SplitJoin for $type {
type Halved = $half;
#[inline(always)]
fn split(self) -> $crate::LoHi<Self::Halved> {
unsafe {
$crate::LoHi::new(
Self::Halved::from_underlying(self.arch(), $getlo(self.to_underlying())),
Self::Halved::from_underlying(self.arch(), $gethi(self.to_underlying())),
)
}
}
#[inline(always)]
fn join(lohi: $crate::LoHi<Self::Halved>) -> Self {
unsafe {
Self::from_underlying(
lohi.lo.arch(),
$join(lohi.lo.to_underlying(), lohi.hi.to_underlying()),
)
}
}
}
};
}
pub(crate) use aarch64_define_bitops;
pub(crate) use aarch64_define_cmp;
pub(crate) use aarch64_define_fma;
pub(crate) use aarch64_define_loadstore;
pub(crate) use aarch64_define_register;
pub(crate) use aarch64_define_splat;
pub(crate) use aarch64_splitjoin;
macro_rules! aarch64_zipunzip {
($half:path, $zip1:ident, $zip2:ident, $uzp1:ident, $uzp2:ident) => {
impl $crate::ZipUnzip for $crate::doubled::Doubled<$half> {
#[inline(always)]
fn zip(halves: $crate::LoHi<<Self as $crate::SplitJoin>::Halved>) -> Self {
use $crate::SIMDVector;
unsafe {
let lo_raw = halves.lo.to_underlying();
let hi_raw = halves.hi.to_underlying();
$crate::doubled::Doubled(
<$half>::from_underlying(halves.lo.arch(), $zip1(lo_raw, hi_raw)),
<$half>::from_underlying(halves.lo.arch(), $zip2(lo_raw, hi_raw)),
)
}
}
#[inline(always)]
fn unzip(self) -> $crate::LoHi<<Self as $crate::SplitJoin>::Halved> {
use $crate::SIMDVector;
unsafe {
let lo_raw = self.0.to_underlying();
let hi_raw = self.1.to_underlying();
$crate::LoHi::new(
<$half>::from_underlying(self.0.arch(), $uzp1(lo_raw, hi_raw)),
<$half>::from_underlying(self.0.arch(), $uzp2(lo_raw, hi_raw)),
)
}
}
}
};
}
pub(crate) use aarch64_zipunzip;