#![cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
use core::arch::aarch64::{self as arch, *};
macro_rules! vld_n_replicate_k {
(
// So we have one unsafe keyword in the pre-expansion.
unsafe: $kind:ident;
size: $size:ident;
$(
$(#[$meta:meta])* fn $intrinsic:ident(_: &[$base_ty:ty; $n:literal][..$len:literal] as $realty:ty) -> $ret:ty;
)*
) => {
$(
vld_n_replicate_k!(
@ $kind $(#[$meta])* $intrinsic: ([$base_ty; $n][..$len] | $realty) -> $ret [$size]
);
)*
};
(@ load $(#[$meta:meta])*
$intrinsic:ident: ([$base_ty:ty; $n:literal][..$registers:literal] | $realty:ty) -> $ret:ty
$([$size:ident])?
) => {
$(#[$meta])*
#[inline]
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
#[target_feature(enable = "neon")]
pub fn $intrinsic(from: &$realty) -> $ret {
$(
$size!($registers registers [[$base_ty; $n]; $registers] as $realty);
)?
unsafe { arch::$intrinsic(::core::ptr::from_ref(from).cast()) }
}
};
(@ store $(#[$meta:meta])*
$intrinsic:ident: ([$base_ty:ty; $n:literal][..$registers:literal] | $realty:ty) -> $ret:ty
$([$size:ident])?
) => {
$(#[$meta])*
#[inline]
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
#[target_feature(enable = "neon")]
pub fn $intrinsic(into: &mut $realty, val: $ret) {
$(
$size!($registers registers [[$base_ty; $n]; $registers] as $realty);
)?
unsafe { arch::$intrinsic(::core::ptr::from_mut(into).cast(), val) }
}
};
}
macro_rules! assert_size_8bytes {
($n:literal registers $ty:ty as $real:ty) => {
const _: () = ::core::assert!(::core::mem::size_of::<$ty>() == 8 * $n);
const _: () = ::core::assert!(::core::mem::size_of::<$real>() == 8 * $n);
};
}
macro_rules! assert_size_16bytes {
($n:literal registers $ty:ty as $real:ty) => {
const _: () = ::core::assert!(::core::mem::size_of::<$ty>() == 16 * $n);
const _: () = ::core::assert!(::core::mem::size_of::<$real>() == 16 * $n);
};
}
#[cfg(test)]
macro_rules! various_sizes {
($n:literal registers $ty:ty as $real:ty) => {
const _: () = ::core::assert!(core::mem::size_of::<$ty>() == core::mem::size_of::<$real>());
};
}
#[cfg(not(test))]
macro_rules! various_sizes {
($n:literal registers $ty:ty as $real:ty) => {};
}
vld_n_replicate_k! {
unsafe: load;
size: assert_size_8bytes;
fn vld1_u8(_: &[u8; 8][..1] as [u8; 8]) -> uint8x8_t;
fn vld1_s8(_: &[i8; 8][..1] as [i8; 8]) -> int8x8_t;
fn vld1_u16(_: &[u16; 4][..1] as [u16; 4]) -> uint16x4_t;
fn vld1_s16(_: &[i16; 4][..1] as [i16; 4]) -> int16x4_t;
fn vld1_u32(_: &[u32; 2][..1] as [u32; 2]) -> uint32x2_t;
fn vld1_s32(_: &[i32; 2][..1] as [i32; 2]) -> int32x2_t;
fn vld1_f32(_: &[f32; 2][..1] as [f32; 2]) -> float32x2_t;
fn vld1_u64(_: &[u64; 1][..1] as u64) -> uint64x1_t;
fn vld1_s64(_: &[i64; 1][..1] as i64) -> int64x1_t;
fn vld1_f64(_: &[f64; 1][..1] as f64) -> float64x1_t;
fn vld1_u8_x2(_: &[u8; 8][..2] as [[u8; 8]; 2]) -> uint8x8x2_t;
fn vld1_s8_x2(_: &[i8; 8][..2] as [[i8; 8]; 2]) -> int8x8x2_t;
fn vld1_u16_x2(_: &[u16; 4][..2] as [[u16; 4]; 2]) -> uint16x4x2_t;
fn vld1_s16_x2(_: &[i16; 4][..2] as [[i16; 4]; 2]) -> int16x4x2_t;
fn vld1_u32_x2(_: &[u32; 2][..2] as [[u32; 2]; 2]) -> uint32x2x2_t;
fn vld1_s32_x2(_: &[i32; 2][..2] as [[i32; 2]; 2]) -> int32x2x2_t;
fn vld1_f32_x2(_: &[f32; 2][..2] as [[f32; 2]; 2]) -> float32x2x2_t;
fn vld1_u64_x2(_: &[u64; 1][..2] as [u64; 2]) -> uint64x1x2_t;
fn vld1_s64_x2(_: &[i64; 1][..2] as [i64; 2]) -> int64x1x2_t;
fn vld1_f64_x2(_: &[f64; 1][..2] as [f64; 2]) -> float64x1x2_t;
fn vld1_u8_x3(_: &[u8; 8][..3] as [[u8; 8]; 3]) -> uint8x8x3_t;
fn vld1_s8_x3(_: &[i8; 8][..3] as [[i8; 8]; 3]) -> int8x8x3_t;
fn vld1_u16_x3(_: &[u16; 4][..3] as [[u16; 4]; 3]) -> uint16x4x3_t;
fn vld1_s16_x3(_: &[i16; 4][..3] as [[i16; 4]; 3]) -> int16x4x3_t;
fn vld1_u32_x3(_: &[u32; 2][..3] as [[u32; 2]; 3]) -> uint32x2x3_t;
fn vld1_s32_x3(_: &[i32; 2][..3] as [[i32; 2]; 3]) -> int32x2x3_t;
fn vld1_f32_x3(_: &[f32; 2][..3] as [[f32; 2]; 3]) -> float32x2x3_t;
fn vld1_u64_x3(_: &[u64; 1][..3] as [u64; 3]) -> uint64x1x3_t;
fn vld1_s64_x3(_: &[i64; 1][..3] as [i64; 3]) -> int64x1x3_t;
fn vld1_f64_x3(_: &[f64; 1][..3] as [f64; 3]) -> float64x1x3_t;
fn vld1_u8_x4(_: &[u8; 8][..4] as [[u8; 8]; 4]) -> uint8x8x4_t;
fn vld1_s8_x4(_: &[i8; 8][..4] as [[i8; 8]; 4]) -> int8x8x4_t;
fn vld1_u16_x4(_: &[u16; 4][..4] as [[u16; 4]; 4]) -> uint16x4x4_t;
fn vld1_s16_x4(_: &[i16; 4][..4] as [[i16; 4]; 4]) -> int16x4x4_t;
fn vld1_u32_x4(_: &[u32; 2][..4] as [[u32; 2]; 4]) -> uint32x2x4_t;
fn vld1_s32_x4(_: &[i32; 2][..4] as [[i32; 2]; 4]) -> int32x2x4_t;
fn vld1_f32_x4(_: &[f32; 2][..4] as [[f32; 2]; 4]) -> float32x2x4_t;
fn vld1_u64_x4(_: &[u64; 1][..4] as [u64; 4]) -> uint64x1x4_t;
fn vld1_s64_x4(_: &[i64; 1][..4] as [i64; 4]) -> int64x1x4_t;
fn vld1_f64_x4(_: &[f64; 1][..4] as [f64; 4]) -> float64x1x4_t;
}
vld_n_replicate_k! {
unsafe: load;
size: assert_size_16bytes;
fn vld1q_u8(_: &[u8; 16][..1] as [u8; 16]) -> uint8x16_t;
fn vld1q_s8(_: &[i8; 16][..1] as [i8; 16]) -> int8x16_t;
fn vld1q_u16(_: &[u16; 8][..1] as [u16; 8]) -> uint16x8_t;
fn vld1q_s16(_: &[i16; 8][..1] as [i16; 8]) -> int16x8_t;
fn vld1q_u32(_: &[u32; 4][..1] as [u32; 4]) -> uint32x4_t;
fn vld1q_s32(_: &[i32; 4][..1] as [i32; 4]) -> int32x4_t;
fn vld1q_f32(_: &[f32; 4][..1] as [f32; 4]) -> float32x4_t;
fn vld1q_u64(_: &[u64; 2][..1] as [u64; 2]) -> uint64x2_t;
fn vld1q_s64(_: &[i64; 2][..1] as [i64; 2]) -> int64x2_t;
fn vld1q_f64(_: &[f64; 2][..1] as [f64; 2]) -> float64x2_t;
fn vld1q_u8_x2(_: &[u8; 16][..2] as [[u8; 16]; 2]) -> uint8x16x2_t;
fn vld1q_s8_x2(_: &[i8; 16][..2] as [[i8; 16]; 2]) -> int8x16x2_t;
fn vld1q_u16_x2(_: &[u16; 8][..2] as [[u16; 8]; 2]) -> uint16x8x2_t;
fn vld1q_s16_x2(_: &[i16; 8][..2] as [[i16; 8]; 2]) -> int16x8x2_t;
fn vld1q_u32_x2(_: &[u32; 4][..2] as [[u32; 4]; 2]) -> uint32x4x2_t;
fn vld1q_s32_x2(_: &[i32; 4][..2] as [[i32; 4]; 2]) -> int32x4x2_t;
fn vld1q_f32_x2(_: &[f32; 4][..2] as [[f32; 4]; 2]) -> float32x4x2_t;
fn vld1q_u64_x2(_: &[u64; 2][..2] as [[u64; 2]; 2]) -> uint64x2x2_t;
fn vld1q_s64_x2(_: &[i64; 2][..2] as [[i64; 2]; 2]) -> int64x2x2_t;
fn vld1q_f64_x2(_: &[f64; 2][..2] as [[f64; 2]; 2]) -> float64x2x2_t;
fn vld1q_u8_x3(_: &[u8; 16][..3] as [[u8; 16]; 3]) -> uint8x16x3_t;
fn vld1q_s8_x3(_: &[i8; 16][..3] as [[i8; 16]; 3]) -> int8x16x3_t;
fn vld1q_u16_x3(_: &[u16; 8][..3] as [[u16; 8]; 3]) -> uint16x8x3_t;
fn vld1q_s16_x3(_: &[i16; 8][..3] as [[i16; 8]; 3]) -> int16x8x3_t;
fn vld1q_u32_x3(_: &[u32; 4][..3] as [[u32; 4]; 3]) -> uint32x4x3_t;
fn vld1q_s32_x3(_: &[i32; 4][..3] as [[i32; 4]; 3]) -> int32x4x3_t;
fn vld1q_f32_x3(_: &[f32; 4][..3] as [[f32; 4]; 3]) -> float32x4x3_t;
fn vld1q_u64_x3(_: &[u64; 2][..3] as [[u64; 2]; 3]) -> uint64x2x3_t;
fn vld1q_s64_x3(_: &[i64; 2][..3] as [[i64; 2]; 3]) -> int64x2x3_t;
fn vld1q_f64_x3(_: &[f64; 2][..3] as [[f64; 2]; 3]) -> float64x2x3_t;
fn vld1q_u8_x4(_: &[u8; 16][..4] as [[u8; 16]; 4]) -> uint8x16x4_t;
fn vld1q_s8_x4(_: &[i8; 16][..4] as [[i8; 16]; 4]) -> int8x16x4_t;
fn vld1q_u16_x4(_: &[u16; 8][..4] as [[u16; 8]; 4]) -> uint16x8x4_t;
fn vld1q_s16_x4(_: &[i16; 8][..4] as [[i16; 8]; 4]) -> int16x8x4_t;
fn vld1q_u32_x4(_: &[u32; 4][..4] as [[u32; 4]; 4]) -> uint32x4x4_t;
fn vld1q_s32_x4(_: &[i32; 4][..4] as [[i32; 4]; 4]) -> int32x4x4_t;
fn vld1q_f32_x4(_: &[f32; 4][..4] as [[f32; 4]; 4]) -> float32x4x4_t;
fn vld1q_u64_x4(_: &[u64; 2][..4] as [[u64; 2]; 4]) -> uint64x2x4_t;
fn vld1q_s64_x4(_: &[i64; 2][..4] as [[i64; 2]; 4]) -> int64x2x4_t;
fn vld1q_f64_x4(_: &[f64; 2][..4] as [[f64; 2]; 4]) -> float64x2x4_t;
}
vld_n_replicate_k! {
unsafe: load;
size: assert_size_16bytes;
fn vld2q_u8(_: &[u8; 16][..2] as [u8; 32]) -> uint8x16x2_t;
fn vld2q_s8(_: &[i8; 16][..2] as [i8; 32]) -> int8x16x2_t;
fn vld2q_u16(_: &[u16; 8][..2] as [u16; 16]) -> uint16x8x2_t;
fn vld2q_s16(_: &[i16; 8][..2] as [i16; 16]) -> int16x8x2_t;
fn vld2q_u32(_: &[u32; 4][..2] as [u32; 8]) -> uint32x4x2_t;
fn vld2q_s32(_: &[i32; 4][..2] as [i32; 8]) -> int32x4x2_t;
fn vld2q_f32(_: &[f32; 4][..2] as [f32; 8]) -> float32x4x2_t;
fn vld2q_u64(_: &[u64; 2][..2] as [u64; 4]) -> uint64x2x2_t;
fn vld2q_s64(_: &[i64; 2][..2] as [i64; 4]) -> int64x2x2_t;
fn vld2q_f64(_: &[f64; 2][..2] as [f64; 4]) -> float64x2x2_t;
fn vld3q_u8(_: &[u8; 16][..3] as [u8; 48]) -> uint8x16x3_t;
fn vld3q_s8(_: &[i8; 16][..3] as [i8; 48]) -> int8x16x3_t;
fn vld3q_u16(_: &[u16; 8][..3] as [u16; 24]) -> uint16x8x3_t;
fn vld3q_s16(_: &[i16; 8][..3] as [i16; 24]) -> int16x8x3_t;
fn vld3q_u32(_: &[u32; 4][..3] as [u32; 12]) -> uint32x4x3_t;
fn vld3q_s32(_: &[i32; 4][..3] as [i32; 12]) -> int32x4x3_t;
fn vld3q_f32(_: &[f32; 4][..3] as [f32; 12]) -> float32x4x3_t;
fn vld3q_u64(_: &[u64; 2][..3] as [u64; 6]) -> uint64x2x3_t;
fn vld3q_s64(_: &[i64; 2][..3] as [i64; 6]) -> int64x2x3_t;
fn vld3q_f64(_: &[f64; 2][..3] as [f64; 6]) -> float64x2x3_t;
fn vld4q_u8(_: &[u8; 16][..4] as [u8; 64]) -> uint8x16x4_t;
fn vld4q_s8(_: &[i8; 16][..4] as [i8; 64]) -> int8x16x4_t;
fn vld4q_u16(_: &[u16; 8][..4] as [u16; 32]) -> uint16x8x4_t;
fn vld4q_s16(_: &[i16; 8][..4] as [i16; 32]) -> int16x8x4_t;
fn vld4q_u32(_: &[u32; 4][..4] as [u32; 16]) -> uint32x4x4_t;
fn vld4q_s32(_: &[i32; 4][..4] as [i32; 16]) -> int32x4x4_t;
fn vld4q_f32(_: &[f32; 4][..4] as [f32; 16]) -> float32x4x4_t;
fn vld4q_u64(_: &[u64; 2][..4] as [u64; 8]) -> uint64x2x4_t;
fn vld4q_s64(_: &[i64; 2][..4] as [i64; 8]) -> int64x2x4_t;
fn vld4q_f64(_: &[f64; 2][..4] as [f64; 8]) -> float64x2x4_t;
}
vld_n_replicate_k! {
unsafe: store;
size: assert_size_8bytes;
fn vst1_u8(_: &[u8; 8][..1] as [u8; 8]) -> uint8x8_t;
fn vst1_s8(_: &[i8; 8][..1] as [i8; 8]) -> int8x8_t;
fn vst1_u16(_: &[u16; 4][..1] as [u16; 4]) -> uint16x4_t;
fn vst1_s16(_: &[i16; 4][..1] as [i16; 4]) -> int16x4_t;
fn vst1_u32(_: &[u32; 2][..1] as [u32; 2]) -> uint32x2_t;
fn vst1_s32(_: &[i32; 2][..1] as [i32; 2]) -> int32x2_t;
fn vst1_f32(_: &[f32; 2][..1] as [f32; 2]) -> float32x2_t;
fn vst1_u64(_: &[u64; 1][..1] as u64) -> uint64x1_t;
fn vst1_s64(_: &[i64; 1][..1] as i64) -> int64x1_t;
fn vst1_f64(_: &[f64; 1][..1] as f64) -> float64x1_t;
fn vst1_u8_x2(_: &[u8; 8][..2] as [[u8; 8]; 2]) -> uint8x8x2_t;
fn vst1_s8_x2(_: &[i8; 8][..2] as [[i8; 8]; 2]) -> int8x8x2_t;
fn vst1_u16_x2(_: &[u16; 4][..2] as [[u16; 4]; 2]) -> uint16x4x2_t;
fn vst1_s16_x2(_: &[i16; 4][..2] as [[i16; 4]; 2]) -> int16x4x2_t;
fn vst1_u32_x2(_: &[u32; 2][..2] as [[u32; 2]; 2]) -> uint32x2x2_t;
fn vst1_s32_x2(_: &[i32; 2][..2] as [[i32; 2]; 2]) -> int32x2x2_t;
fn vst1_f32_x2(_: &[f32; 2][..2] as [[f32; 2]; 2]) -> float32x2x2_t;
fn vst1_u64_x2(_: &[u64; 1][..2] as [u64; 2]) -> uint64x1x2_t;
fn vst1_s64_x2(_: &[i64; 1][..2] as [i64; 2]) -> int64x1x2_t;
fn vst1_f64_x2(_: &[f64; 1][..2] as [f64; 2]) -> float64x1x2_t;
fn vst1_u8_x3(_: &[u8; 8][..3] as [[u8; 8]; 3]) -> uint8x8x3_t;
fn vst1_s8_x3(_: &[i8; 8][..3] as [[i8; 8]; 3]) -> int8x8x3_t;
fn vst1_u16_x3(_: &[u16; 4][..3] as [[u16; 4]; 3]) -> uint16x4x3_t;
fn vst1_s16_x3(_: &[i16; 4][..3] as [[i16; 4]; 3]) -> int16x4x3_t;
fn vst1_u32_x3(_: &[u32; 2][..3] as [[u32; 2]; 3]) -> uint32x2x3_t;
fn vst1_s32_x3(_: &[i32; 2][..3] as [[i32; 2]; 3]) -> int32x2x3_t;
fn vst1_f32_x3(_: &[f32; 2][..3] as [[f32; 2]; 3]) -> float32x2x3_t;
fn vst1_u64_x3(_: &[u64; 1][..3] as [u64; 3]) -> uint64x1x3_t;
fn vst1_s64_x3(_: &[i64; 1][..3] as [i64; 3]) -> int64x1x3_t;
fn vst1_f64_x3(_: &[f64; 1][..3] as [f64; 3]) -> float64x1x3_t;
fn vst1_u8_x4(_: &[u8; 8][..4] as [[u8; 8]; 4]) -> uint8x8x4_t;
fn vst1_s8_x4(_: &[i8; 8][..4] as [[i8; 8]; 4]) -> int8x8x4_t;
fn vst1_u16_x4(_: &[u16; 4][..4] as [[u16; 4]; 4]) -> uint16x4x4_t;
fn vst1_s16_x4(_: &[i16; 4][..4] as [[i16; 4]; 4]) -> int16x4x4_t;
fn vst1_u32_x4(_: &[u32; 2][..4] as [[u32; 2]; 4]) -> uint32x2x4_t;
fn vst1_s32_x4(_: &[i32; 2][..4] as [[i32; 2]; 4]) -> int32x2x4_t;
fn vst1_f32_x4(_: &[f32; 2][..4] as [[f32; 2]; 4]) -> float32x2x4_t;
fn vst1_u64_x4(_: &[u64; 1][..4] as [u64; 4]) -> uint64x1x4_t;
fn vst1_s64_x4(_: &[i64; 1][..4] as [i64; 4]) -> int64x1x4_t;
fn vst1_f64_x4(_: &[f64; 1][..4] as [f64; 4]) -> float64x1x4_t;
}
vld_n_replicate_k! {
unsafe: store;
size: assert_size_16bytes;
fn vst1q_u8(_: &[u8; 16][..1] as [u8; 16]) -> uint8x16_t;
fn vst1q_s8(_: &[i8; 16][..1] as [i8; 16]) -> int8x16_t;
fn vst1q_u16(_: &[u16; 8][..1] as [u16; 8]) -> uint16x8_t;
fn vst1q_s16(_: &[i16; 8][..1] as [i16; 8]) -> int16x8_t;
fn vst1q_u32(_: &[u32; 4][..1] as [u32; 4]) -> uint32x4_t;
fn vst1q_s32(_: &[i32; 4][..1] as [i32; 4]) -> int32x4_t;
fn vst1q_f32(_: &[f32; 4][..1] as [f32; 4]) -> float32x4_t;
fn vst1q_u64(_: &[u64; 2][..1] as [u64; 2]) -> uint64x2_t;
fn vst1q_s64(_: &[i64; 2][..1] as [i64; 2]) -> int64x2_t;
fn vst1q_f64(_: &[f64; 2][..1] as [f64; 2]) -> float64x2_t;
fn vst1q_u8_x2(_: &[u8; 16][..2] as [[u8; 16]; 2]) -> uint8x16x2_t;
fn vst1q_s8_x2(_: &[i8; 16][..2] as [[i8; 16]; 2]) -> int8x16x2_t;
fn vst1q_u16_x2(_: &[u16; 8][..2] as [[u16; 8]; 2]) -> uint16x8x2_t;
fn vst1q_s16_x2(_: &[i16; 8][..2] as [[i16; 8]; 2]) -> int16x8x2_t;
fn vst1q_u32_x2(_: &[u32; 4][..2] as [[u32; 4]; 2]) -> uint32x4x2_t;
fn vst1q_s32_x2(_: &[i32; 4][..2] as [[i32; 4]; 2]) -> int32x4x2_t;
fn vst1q_f32_x2(_: &[f32; 4][..2] as [[f32; 4]; 2]) -> float32x4x2_t;
fn vst1q_u64_x2(_: &[u64; 2][..2] as [[u64; 2]; 2]) -> uint64x2x2_t;
fn vst1q_s64_x2(_: &[i64; 2][..2] as [[i64; 2]; 2]) -> int64x2x2_t;
fn vst1q_f64_x2(_: &[f64; 2][..2] as [[f64; 2]; 2]) -> float64x2x2_t;
fn vst1q_u8_x3(_: &[u8; 16][..3] as [[u8; 16]; 3]) -> uint8x16x3_t;
fn vst1q_s8_x3(_: &[i8; 16][..3] as [[i8; 16]; 3]) -> int8x16x3_t;
fn vst1q_u16_x3(_: &[u16; 8][..3] as [[u16; 8]; 3]) -> uint16x8x3_t;
fn vst1q_s16_x3(_: &[i16; 8][..3] as [[i16; 8]; 3]) -> int16x8x3_t;
fn vst1q_u32_x3(_: &[u32; 4][..3] as [[u32; 4]; 3]) -> uint32x4x3_t;
fn vst1q_s32_x3(_: &[i32; 4][..3] as [[i32; 4]; 3]) -> int32x4x3_t;
fn vst1q_f32_x3(_: &[f32; 4][..3] as [[f32; 4]; 3]) -> float32x4x3_t;
fn vst1q_u64_x3(_: &[u64; 2][..3] as [[u64; 2]; 3]) -> uint64x2x3_t;
fn vst1q_s64_x3(_: &[i64; 2][..3] as [[i64; 2]; 3]) -> int64x2x3_t;
fn vst1q_f64_x3(_: &[f64; 2][..3] as [[f64; 2]; 3]) -> float64x2x3_t;
fn vst1q_u8_x4(_: &[u8; 16][..4] as [[u8; 16]; 4]) -> uint8x16x4_t;
fn vst1q_s8_x4(_: &[i8; 16][..4] as [[i8; 16]; 4]) -> int8x16x4_t;
fn vst1q_u16_x4(_: &[u16; 8][..4] as [[u16; 8]; 4]) -> uint16x8x4_t;
fn vst1q_s16_x4(_: &[i16; 8][..4] as [[i16; 8]; 4]) -> int16x8x4_t;
fn vst1q_u32_x4(_: &[u32; 4][..4] as [[u32; 4]; 4]) -> uint32x4x4_t;
fn vst1q_s32_x4(_: &[i32; 4][..4] as [[i32; 4]; 4]) -> int32x4x4_t;
fn vst1q_f32_x4(_: &[f32; 4][..4] as [[f32; 4]; 4]) -> float32x4x4_t;
fn vst1q_u64_x4(_: &[u64; 2][..4] as [[u64; 2]; 4]) -> uint64x2x4_t;
fn vst1q_s64_x4(_: &[i64; 2][..4] as [[i64; 2]; 4]) -> int64x2x4_t;
fn vst1q_f64_x4(_: &[f64; 2][..4] as [[f64; 2]; 4]) -> float64x2x4_t;
}
vld_n_replicate_k! {
unsafe: store;
size: assert_size_16bytes;
fn vst2q_u8(_: &[u8; 16][..2] as [u8; 32]) -> uint8x16x2_t;
fn vst2q_s8(_: &[i8; 16][..2] as [i8; 32]) -> int8x16x2_t;
fn vst2q_u16(_: &[u16; 8][..2] as [u16; 16]) -> uint16x8x2_t;
fn vst2q_s16(_: &[i16; 8][..2] as [i16; 16]) -> int16x8x2_t;
fn vst2q_u32(_: &[u32; 4][..2] as [u32; 8]) -> uint32x4x2_t;
fn vst2q_s32(_: &[i32; 4][..2] as [i32; 8]) -> int32x4x2_t;
fn vst2q_f32(_: &[f32; 4][..2] as [f32; 8]) -> float32x4x2_t;
fn vst2q_u64(_: &[u64; 2][..2] as [u64; 4]) -> uint64x2x2_t;
fn vst2q_s64(_: &[i64; 2][..2] as [i64; 4]) -> int64x2x2_t;
fn vst2q_f64(_: &[f64; 2][..2] as [f64; 4]) -> float64x2x2_t;
fn vst3q_u8(_: &[u8; 16][..3] as [u8; 48]) -> uint8x16x3_t;
fn vst3q_s8(_: &[i8; 16][..3] as [i8; 48]) -> int8x16x3_t;
fn vst3q_u16(_: &[u16; 8][..3] as [u16; 24]) -> uint16x8x3_t;
fn vst3q_s16(_: &[i16; 8][..3] as [i16; 24]) -> int16x8x3_t;
fn vst3q_u32(_: &[u32; 4][..3] as [u32; 12]) -> uint32x4x3_t;
fn vst3q_s32(_: &[i32; 4][..3] as [i32; 12]) -> int32x4x3_t;
fn vst3q_f32(_: &[f32; 4][..3] as [f32; 12]) -> float32x4x3_t;
fn vst3q_u64(_: &[u64; 2][..3] as [u64; 6]) -> uint64x2x3_t;
fn vst3q_s64(_: &[i64; 2][..3] as [i64; 6]) -> int64x2x3_t;
fn vst3q_f64(_: &[f64; 2][..3] as [f64; 6]) -> float64x2x3_t;
fn vst4q_u8(_: &[u8; 16][..4] as [u8; 64]) -> uint8x16x4_t;
fn vst4q_s8(_: &[i8; 16][..4] as [i8; 64]) -> int8x16x4_t;
fn vst4q_u16(_: &[u16; 8][..4] as [u16; 32]) -> uint16x8x4_t;
fn vst4q_s16(_: &[i16; 8][..4] as [i16; 32]) -> int16x8x4_t;
fn vst4q_u32(_: &[u32; 4][..4] as [u32; 16]) -> uint32x4x4_t;
fn vst4q_s32(_: &[i32; 4][..4] as [i32; 16]) -> int32x4x4_t;
fn vst4q_f32(_: &[f32; 4][..4] as [f32; 16]) -> float32x4x4_t;
fn vst4q_u64(_: &[u64; 2][..4] as [u64; 8]) -> uint64x2x4_t;
fn vst4q_s64(_: &[i64; 2][..4] as [i64; 8]) -> int64x2x4_t;
fn vst4q_f64(_: &[f64; 2][..4] as [f64; 8]) -> float64x2x4_t;
}
vld_n_replicate_k! {
unsafe: load;
size: various_sizes;
fn vld1_dup_s8(_: &[i8; 1][..1] as i8) -> int8x8_t;
fn vld2_dup_s8(_: &[i8; 2][..1] as [i8; 2]) -> int8x8x2_t;
fn vld3_dup_s8(_: &[i8; 3][..1] as [i8; 3]) -> int8x8x3_t;
fn vld4_dup_s8(_: &[i8; 4][..1] as [i8; 4]) -> int8x8x4_t;
fn vld1_dup_u8(_: &[u8; 1][..1] as u8) -> uint8x8_t;
fn vld2_dup_u8(_: &[u8; 2][..1] as [u8; 2]) -> uint8x8x2_t;
fn vld3_dup_u8(_: &[u8; 3][..1] as [u8; 3]) -> uint8x8x3_t;
fn vld4_dup_u8(_: &[u8; 4][..1] as [u8; 4]) -> uint8x8x4_t;
fn vld1_dup_s16(_: &[i16; 1][..1] as i16) -> int16x4_t;
fn vld2_dup_s16(_: &[i16; 2][..1] as [i16; 2]) -> int16x4x2_t;
fn vld3_dup_s16(_: &[i16; 3][..1] as [i16; 3]) -> int16x4x3_t;
fn vld4_dup_s16(_: &[i16; 4][..1] as [i16; 4]) -> int16x4x4_t;
fn vld1_dup_u16(_: &[u16; 1][..1] as u16) -> uint16x4_t;
fn vld2_dup_u16(_: &[u16; 2][..1] as [u16; 2]) -> uint16x4x2_t;
fn vld3_dup_u16(_: &[u16; 3][..1] as [u16; 3]) -> uint16x4x3_t;
fn vld4_dup_u16(_: &[u16; 4][..1] as [u16; 4]) -> uint16x4x4_t;
fn vld1_dup_s32(_: &[i32; 1][..1] as i32) -> int32x2_t;
fn vld2_dup_s32(_: &[i32; 2][..1] as [i32; 2]) -> int32x2x2_t;
fn vld3_dup_s32(_: &[i32; 3][..1] as [i32; 3]) -> int32x2x3_t;
fn vld4_dup_s32(_: &[i32; 4][..1] as [i32; 4]) -> int32x2x4_t;
fn vld1_dup_u32(_: &[u32; 1][..1] as u32) -> uint32x2_t;
fn vld2_dup_u32(_: &[u32; 2][..1] as [u32; 2]) -> uint32x2x2_t;
fn vld3_dup_u32(_: &[u32; 3][..1] as [u32; 3]) -> uint32x2x3_t;
fn vld4_dup_u32(_: &[u32; 4][..1] as [u32; 4]) -> uint32x2x4_t;
fn vld1_dup_f32(_: &[f32; 1][..1] as f32) -> float32x2_t;
fn vld2_dup_f32(_: &[f32; 2][..1] as [f32; 2]) -> float32x2x2_t;
fn vld3_dup_f32(_: &[f32; 3][..1] as [f32; 3]) -> float32x2x3_t;
fn vld4_dup_f32(_: &[f32; 4][..1] as [f32; 4]) -> float32x2x4_t;
fn vld1_dup_s64(_: &[i64; 1][..1] as i64) -> int64x1_t;
fn vld2_dup_s64(_: &[i64; 2][..1] as [i64; 2]) -> int64x1x2_t;
fn vld3_dup_s64(_: &[i64; 3][..1] as [i64; 3]) -> int64x1x3_t;
fn vld4_dup_s64(_: &[i64; 4][..1] as [i64; 4]) -> int64x1x4_t;
fn vld1_dup_u64(_: &[u64; 1][..1] as u64) -> uint64x1_t;
fn vld2_dup_u64(_: &[u64; 2][..1] as [u64; 2]) -> uint64x1x2_t;
fn vld3_dup_u64(_: &[u64; 3][..1] as [u64; 3]) -> uint64x1x3_t;
fn vld4_dup_u64(_: &[u64; 4][..1] as [u64; 4]) -> uint64x1x4_t;
fn vld1_dup_f64(_: &[f64; 1][..1] as f64) -> float64x1_t;
fn vld2_dup_f64(_: &[f64; 2][..1] as [f64; 2]) -> float64x1x2_t;
fn vld3_dup_f64(_: &[f64; 3][..1] as [f64; 3]) -> float64x1x3_t;
fn vld4_dup_f64(_: &[f64; 4][..1] as [f64; 4]) -> float64x1x4_t;
}
vld_n_replicate_k! {
unsafe: load;
size: various_sizes;
fn vld1q_dup_s8(_: &[i8; 1][..1] as i8) -> int8x16_t;
fn vld2q_dup_s8(_: &[i8; 2][..1] as [i8; 2]) -> int8x16x2_t;
fn vld3q_dup_s8(_: &[i8; 3][..1] as [i8; 3]) -> int8x16x3_t;
fn vld4q_dup_s8(_: &[i8; 4][..1] as [i8; 4]) -> int8x16x4_t;
fn vld1q_dup_u8(_: &[u8; 1][..1] as u8) -> uint8x16_t;
fn vld2q_dup_u8(_: &[u8; 2][..1] as [u8; 2]) -> uint8x16x2_t;
fn vld3q_dup_u8(_: &[u8; 3][..1] as [u8; 3]) -> uint8x16x3_t;
fn vld4q_dup_u8(_: &[u8; 4][..1] as [u8; 4]) -> uint8x16x4_t;
fn vld1q_dup_s16(_: &[i16; 1][..1] as i16) -> int16x8_t;
fn vld2q_dup_s16(_: &[i16; 2][..1] as [i16; 2]) -> int16x8x2_t;
fn vld3q_dup_s16(_: &[i16; 3][..1] as [i16; 3]) -> int16x8x3_t;
fn vld4q_dup_s16(_: &[i16; 4][..1] as [i16; 4]) -> int16x8x4_t;
fn vld1q_dup_u16(_: &[u16; 1][..1] as u16) -> uint16x8_t;
fn vld2q_dup_u16(_: &[u16; 2][..1] as [u16; 2]) -> uint16x8x2_t;
fn vld3q_dup_u16(_: &[u16; 3][..1] as [u16; 3]) -> uint16x8x3_t;
fn vld4q_dup_u16(_: &[u16; 4][..1] as [u16; 4]) -> uint16x8x4_t;
fn vld1q_dup_s32(_: &[i32; 1][..1] as i32) -> int32x4_t;
fn vld2q_dup_s32(_: &[i32; 2][..1] as [i32; 2]) -> int32x4x2_t;
fn vld3q_dup_s32(_: &[i32; 3][..1] as [i32; 3]) -> int32x4x3_t;
fn vld4q_dup_s32(_: &[i32; 4][..1] as [i32; 4]) -> int32x4x4_t;
fn vld1q_dup_u32(_: &[u32; 1][..1] as u32) -> uint32x4_t;
fn vld2q_dup_u32(_: &[u32; 2][..1] as [u32; 2]) -> uint32x4x2_t;
fn vld3q_dup_u32(_: &[u32; 3][..1] as [u32; 3]) -> uint32x4x3_t;
fn vld4q_dup_u32(_: &[u32; 4][..1] as [u32; 4]) -> uint32x4x4_t;
fn vld1q_dup_f32(_: &[f32; 1][..1] as f32) -> float32x4_t;
fn vld2q_dup_f32(_: &[f32; 2][..1] as [f32; 2]) -> float32x4x2_t;
fn vld3q_dup_f32(_: &[f32; 3][..1] as [f32; 3]) -> float32x4x3_t;
fn vld4q_dup_f32(_: &[f32; 4][..1] as [f32; 4]) -> float32x4x4_t;
fn vld1q_dup_s64(_: &[i64; 1][..1] as i64) -> int64x2_t;
fn vld2q_dup_s64(_: &[i64; 2][..1] as [i64; 2]) -> int64x2x2_t;
fn vld3q_dup_s64(_: &[i64; 3][..1] as [i64; 3]) -> int64x2x3_t;
fn vld4q_dup_s64(_: &[i64; 4][..1] as [i64; 4]) -> int64x2x4_t;
fn vld1q_dup_u64(_: &[u64; 1][..1] as u64) -> uint64x2_t;
fn vld2q_dup_u64(_: &[u64; 2][..1] as [u64; 2]) -> uint64x2x2_t;
fn vld3q_dup_u64(_: &[u64; 3][..1] as [u64; 3]) -> uint64x2x3_t;
fn vld4q_dup_u64(_: &[u64; 4][..1] as [u64; 4]) -> uint64x2x4_t;
fn vld1q_dup_f64(_: &[f64; 1][..1] as f64) -> float64x2_t;
fn vld2q_dup_f64(_: &[f64; 2][..1] as [f64; 2]) -> float64x2x2_t;
fn vld3q_dup_f64(_: &[f64; 3][..1] as [f64; 3]) -> float64x2x3_t;
fn vld4q_dup_f64(_: &[f64; 4][..1] as [f64; 4]) -> float64x2x4_t;
}
#[cfg(test)]
mod tests {
use core::arch::aarch64 as arch;
macro_rules! test_vld1_from_slice {
($(#[$attr:meta])* fn $testname:ident, $intrinsic:ident, $base:ty, $ty:ty $(, $with:expr)?) => {
#[test]
#[cfg(target_feature = "neon")]
$(#[$attr])*
fn $testname() {
fn assert_eq<const N: usize>(v: $ty, val: [$base; N]) {
assert!(core::mem::size_of::<$ty>() == core::mem::size_of::<[$base; N]>());
let v = unsafe { core::mem::transmute_copy::<$ty, [$base; N]>(&v) };
assert_eq!(v, val);
}
#[target_feature(enable = "neon")]
fn test() {
let source = core::array::from_fn(|i| i as $base);
let argument = source;
$( let argument = $with(argument);
)?
let result: $ty = super::$intrinsic(&argument);
assert_eq(result, source);
}
unsafe { test() }
}
};
}
test_vld1_from_slice!(fn test_vld1_u8, vld1_u8, u8, arch::uint8x8_t);
test_vld1_from_slice!(fn test_vld1_i8, vld1_s8, i8, arch::int8x8_t);
test_vld1_from_slice!(fn test_vld1_u16, vld1_u16, u16, arch::uint16x4_t);
test_vld1_from_slice!(fn test_vld1_i16, vld1_s16, i16, arch::int16x4_t);
test_vld1_from_slice!(fn test_vld1_u32, vld1_u32, u32, arch::uint32x2_t);
test_vld1_from_slice!(fn test_vld1_i32, vld1_s32, i32, arch::int32x2_t);
test_vld1_from_slice!(fn test_vld1_f32, vld1_f32, f32, arch::float32x2_t);
test_vld1_from_slice!(fn test_vld1_u64, vld1_u64, u64, arch::uint64x1_t, |[val]: [_; 1]| val);
test_vld1_from_slice!(fn test_vld1_i64, vld1_s64, i64, arch::int64x1_t, |[val]: [_; 1]| val);
test_vld1_from_slice!(fn test_vld1_f64, vld1_f64, f64, arch::float64x1_t, |[val]: [_; 1]| val);
fn as_chunks<T: Copy, const L: usize, const N: usize, const M: usize>(
v: [T; N],
) -> [[T; M]; L] {
<[[T; M]; L]>::try_from(v.as_chunks::<M>().0).unwrap()
}
test_vld1_from_slice!(fn test_vld1_u8_x2, vld1_u8_x2, u8, arch::uint8x8x2_t, as_chunks::<_, 2, 16, 8>);
test_vld1_from_slice!(fn test_vld1_i8_x2, vld1_s8_x2, i8, arch::int8x8x2_t, as_chunks::<_, 2, 16, 8>);
test_vld1_from_slice!(fn test_vld1_u16_x2, vld1_u16_x2, u16, arch::uint16x4x2_t, as_chunks::<_, 2, 8, 4>);
test_vld1_from_slice!(fn test_vld1_i16_x2, vld1_s16_x2, i16, arch::int16x4x2_t, as_chunks::<_, 2, 8, 4>);
test_vld1_from_slice!(fn test_vld1_u32_x2, vld1_u32_x2, u32, arch::uint32x2x2_t, as_chunks::<_, 2, 4, 2>);
test_vld1_from_slice!(fn test_vld1_i32_x2, vld1_s32_x2, i32, arch::int32x2x2_t, as_chunks::<_, 2, 4, 2>);
test_vld1_from_slice!(fn test_vld1_f32_x2, vld1_f32_x2, f32, arch::float32x2x2_t, as_chunks::<_, 2, 4, 2>);
test_vld1_from_slice!(fn test_vld1_u64_x2, vld1_u64_x2, u64, arch::uint64x1x2_t);
test_vld1_from_slice!(fn test_vld1_i64_x2, vld1_s64_x2, i64, arch::int64x1x2_t);
test_vld1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vld1_f64_x2, vld1_f64_x2, f64, arch::float64x1x2_t);
test_vld1_from_slice!(fn test_vld1_u8_x3, vld1_u8_x3, u8, arch::uint8x8x3_t, as_chunks::<_, 3, 24, 8>);
test_vld1_from_slice!(fn test_vld1_i8_x3, vld1_s8_x3, i8, arch::int8x8x3_t, as_chunks::<_, 3, 24, 8>);
test_vld1_from_slice!(fn test_vld1_u16_x3, vld1_u16_x3, u16, arch::uint16x4x3_t, as_chunks::<_, 3, 12, 4>);
test_vld1_from_slice!(fn test_vld1_i16_x3, vld1_s16_x3, i16, arch::int16x4x3_t, as_chunks::<_, 3, 12, 4>);
test_vld1_from_slice!(fn test_vld1_u32_x3, vld1_u32_x3, u32, arch::uint32x2x3_t, as_chunks::<_, 3, 6, 2>);
test_vld1_from_slice!(fn test_vld1_i32_x3, vld1_s32_x3, i32, arch::int32x2x3_t, as_chunks::<_, 3, 6, 2>);
test_vld1_from_slice!(fn test_vld1_f32_x3, vld1_f32_x3, f32, arch::float32x2x3_t, as_chunks::<_, 3, 6, 2>);
test_vld1_from_slice!(fn test_vld1_u64_x3, vld1_u64_x3, u64, arch::uint64x1x3_t);
test_vld1_from_slice!(fn test_vld1_i64_x3, vld1_s64_x3, i64, arch::int64x1x3_t);
test_vld1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vld1_f64_x3, vld1_f64_x3, f64, arch::float64x1x3_t);
test_vld1_from_slice!(fn test_vld1_u8_x4, vld1_u8_x4, u8, arch::uint8x8x4_t, as_chunks::<_, 4, 32, 8>);
test_vld1_from_slice!(fn test_vld1_i8_x4, vld1_s8_x4, i8, arch::int8x8x4_t, as_chunks::<_, 4, 32, 8>);
test_vld1_from_slice!(fn test_vld1_u16_x4, vld1_u16_x4, u16, arch::uint16x4x4_t, as_chunks::<_, 4, 16, 4>);
test_vld1_from_slice!(fn test_vld1_i16_x4, vld1_s16_x4, i16, arch::int16x4x4_t, as_chunks::<_, 4, 16, 4>);
test_vld1_from_slice!(fn test_vld1_u32_x4, vld1_u32_x4, u32, arch::uint32x2x4_t, as_chunks::<_, 4, 8, 2>);
test_vld1_from_slice!(fn test_vld1_i32_x4, vld1_s32_x4, i32, arch::int32x2x4_t, as_chunks::<_, 4, 8, 2>);
test_vld1_from_slice!(fn test_vld1_f32_x4, vld1_f32_x4, f32, arch::float32x2x4_t, as_chunks::<_, 4, 8, 2>);
test_vld1_from_slice!(fn test_vld1_u64_x4, vld1_u64_x4, u64, arch::uint64x1x4_t);
test_vld1_from_slice!(fn test_vld1_i64_x4, vld1_s64_x4, i64, arch::int64x1x4_t);
test_vld1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vld1_f64_x4, vld1_f64_x4, f64, arch::float64x1x4_t);
test_vld1_from_slice!(fn test_vld1q_u8, vld1q_u8, u8, arch::uint8x16_t);
test_vld1_from_slice!(fn test_vld1q_i8, vld1q_s8, i8, arch::int8x16_t);
test_vld1_from_slice!(fn test_vld1q_u16, vld1q_u16, u16, arch::uint16x8_t);
test_vld1_from_slice!(fn test_vld1q_i16, vld1q_s16, i16, arch::int16x8_t);
test_vld1_from_slice!(fn test_vld1q_u32, vld1q_u32, u32, arch::uint32x4_t);
test_vld1_from_slice!(fn test_vld1q_i32, vld1q_s32, i32, arch::int32x4_t);
test_vld1_from_slice!(fn test_vld1q_f32, vld1q_f32, f32, arch::float32x4_t);
test_vld1_from_slice!(fn test_vld1q_u64, vld1q_u64, u64, arch::uint64x2_t);
test_vld1_from_slice!(fn test_vld1q_i64, vld1q_s64, i64, arch::int64x2_t);
test_vld1_from_slice!(fn test_vld1q_f64, vld1q_f64, f64, arch::float64x2_t);
test_vld1_from_slice!(fn test_vld1q_u8_x2, vld1q_u8_x2, u8, arch::uint8x16x2_t, as_chunks::<_, 2, 32, 16>);
test_vld1_from_slice!(fn test_vld1q_i8_x2, vld1q_s8_x2, i8, arch::int8x16x2_t, as_chunks::<_, 2, 32, 16>);
test_vld1_from_slice!(fn test_vld1q_u16_x2, vld1q_u16_x2, u16, arch::uint16x8x2_t, as_chunks::<_, 2, 16, 8>);
test_vld1_from_slice!(fn test_vld1q_i16_x2, vld1q_s16_x2, i16, arch::int16x8x2_t, as_chunks::<_, 2, 16, 8>);
test_vld1_from_slice!(fn test_vld1q_u32_x2, vld1q_u32_x2, u32, arch::uint32x4x2_t, as_chunks::<_, 2, 8, 4>);
test_vld1_from_slice!(fn test_vld1q_i32_x2, vld1q_s32_x2, i32, arch::int32x4x2_t, as_chunks::<_, 2, 8, 4>);
test_vld1_from_slice!(fn test_vld1q_f32_x2, vld1q_f32_x2, f32, arch::float32x4x2_t, as_chunks::<_, 2, 8, 4>);
test_vld1_from_slice!(fn test_vld1q_u64_x2, vld1q_u64_x2, u64, arch::uint64x2x2_t, as_chunks::<_, 2, 4, 2>);
test_vld1_from_slice!(fn test_vld1q_i64_x2, vld1q_s64_x2, i64, arch::int64x2x2_t, as_chunks::<_, 2, 4, 2>);
test_vld1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vld1q_f64_x2, vld1q_f64_x2, f64, arch::float64x2x2_t, as_chunks::<_, 2, 4, 2>);
test_vld1_from_slice!(fn test_vld1q_u8_x3, vld1q_u8_x3, u8, arch::uint8x16x3_t,as_chunks::<_, 3, 48, 16>);
test_vld1_from_slice!(fn test_vld1q_i8_x3, vld1q_s8_x3, i8, arch::int8x16x3_t, as_chunks::<_, 3, 48, 16>);
test_vld1_from_slice!(fn test_vld1q_u16_x3, vld1q_u16_x3, u16, arch::uint16x8x3_t, as_chunks::<_, 3, 24, 8>);
test_vld1_from_slice!(fn test_vld1q_i16_x3, vld1q_s16_x3, i16, arch::int16x8x3_t, as_chunks::<_, 3, 24, 8>);
test_vld1_from_slice!(fn test_vld1q_u32_x3, vld1q_u32_x3, u32, arch::uint32x4x3_t, as_chunks::<_, 3, 12, 4>);
test_vld1_from_slice!(fn test_vld1q_i32_x3, vld1q_s32_x3, i32, arch::int32x4x3_t, as_chunks::<_, 3, 12, 4>);
test_vld1_from_slice!(fn test_vld1q_f32_x3, vld1q_f32_x3, f32, arch::float32x4x3_t, as_chunks::<_, 3, 12, 4>);
test_vld1_from_slice!(fn test_vld1q_u64_x3, vld1q_u64_x3, u64, arch::uint64x2x3_t, as_chunks::<_, 3, 6, 2>);
test_vld1_from_slice!(fn test_vld1q_i64_x3, vld1q_s64_x3, i64, arch::int64x2x3_t, as_chunks::<_, 3, 6, 2>);
test_vld1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vld1q_f64_x3, vld1q_f64_x3, f64, arch::float64x2x3_t, as_chunks::<_, 3, 6, 2>);
test_vld1_from_slice!(fn test_vld1q_u8_x4, vld1q_u8_x4, u8, arch::uint8x16x4_t, as_chunks::<_, 4, 64, 16>);
test_vld1_from_slice!(fn test_vld1q_i8_x4, vld1q_s8_x4, i8, arch::int8x16x4_t, as_chunks::<_, 4, 64, 16>);
test_vld1_from_slice!(fn test_vld1q_u16_x4, vld1q_u16_x4, u16, arch::uint16x8x4_t, as_chunks::<_, 4, 32, 8>);
test_vld1_from_slice!(fn test_vld1q_i16_x4, vld1q_s16_x4, i16, arch::int16x8x4_t, as_chunks::<_, 4, 32, 8>);
test_vld1_from_slice!(fn test_vld1q_u32_x4, vld1q_u32_x4, u32, arch::uint32x4x4_t, as_chunks::<_, 4, 16, 4>);
test_vld1_from_slice!(fn test_vld1q_i32_x4, vld1q_s32_x4, i32, arch::int32x4x4_t, as_chunks::<_, 4, 16, 4>);
test_vld1_from_slice!(fn test_vld1q_f32_x4, vld1q_f32_x4, f32, arch::float32x4x4_t, as_chunks::<_, 4, 16, 4>);
test_vld1_from_slice!(fn test_vld1q_u64_x4, vld1q_u64_x4, u64, arch::uint64x2x4_t, as_chunks::<_, 4, 8, 2>);
test_vld1_from_slice!(fn test_vld1q_i64_x4, vld1q_s64_x4, i64, arch::int64x2x4_t, as_chunks::<_, 4, 8, 2>);
test_vld1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vld1q_f64_x4, vld1q_f64_x4, f64, arch::float64x2x4_t, as_chunks::<_, 4, 8, 2>);
macro_rules! test_vst1_from_slice {
($(#[$attr:meta])* fn $testname:ident, $intrinsic:ident, $base:ty, $ty:ty $(, $with:expr)?) => {
#[test]
#[cfg(target_feature = "neon")]
$(#[$attr])*
fn $testname() {
fn generate<const N: usize>(val: &[$base; N]) -> $ty {
assert!(core::mem::size_of::<$ty>() == core::mem::size_of::<[$base; N]>());
unsafe { core::mem::transmute_copy::<[$base; N], $ty>(val) }
}
fn result_init<T>() -> T {
unsafe { core::mem::zeroed() }
}
fn assert_eq<T: PartialEq + core::fmt::Debug, const N: usize>(a: &[T; N], b: &[T; N]) {
assert_eq!(a, b);
}
#[target_feature(enable = "neon")]
fn test() {
let ground_truth = core::array::from_fn(|i| i as $base);
let argument = generate(&ground_truth);
let mut result = result_init();
super::$intrinsic(&mut result, argument);
$( let result = $with(result);
)?
assert_eq(&result, &ground_truth);
}
unsafe { test() }
}
};
}
test_vst1_from_slice!(fn test_vst1_u8, vst1_u8, u8, arch::uint8x8_t);
test_vst1_from_slice!(fn test_vst1_i8, vst1_s8, i8, arch::int8x8_t);
test_vst1_from_slice!(fn test_vst1_u16, vst1_u16, u16, arch::uint16x4_t);
test_vst1_from_slice!(fn test_vst1_i16, vst1_s16, i16, arch::int16x4_t);
test_vst1_from_slice!(fn test_vst1_u32, vst1_u32, u32, arch::uint32x2_t);
test_vst1_from_slice!(fn test_vst1_i32, vst1_s32, i32, arch::int32x2_t);
test_vst1_from_slice!(fn test_vst1_f32, vst1_f32, f32, arch::float32x2_t);
test_vst1_from_slice!(fn test_vst1_u64, vst1_u64, u64, arch::uint64x1_t, |val| [val]);
test_vst1_from_slice!(fn test_vst1_i64, vst1_s64, i64, arch::int64x1_t, |val| [val]);
test_vst1_from_slice!(fn test_vst1_f64, vst1_f64, f64, arch::float64x1_t, |val| [val]);
fn flatten<T: Copy, const L: usize, const N: usize, const M: usize>(v: [[T; M]; L]) -> [T; N] {
<[T; N]>::try_from(v.as_flattened()).unwrap()
}
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_u8_x2, vst1_u8_x2, u8, arch::uint8x8x2_t, flatten::<_, 2, 16, 8>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_i8_x2, vst1_s8_x2, i8, arch::int8x8x2_t, flatten::<_, 2, 16, 8>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_u16_x2, vst1_u16_x2, u16, arch::uint16x4x2_t, flatten::<_, 2, 8, 4>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_i16_x2, vst1_s16_x2, i16, arch::int16x4x2_t, flatten::<_, 2, 8, 4>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_u32_x2, vst1_u32_x2, u32, arch::uint32x2x2_t, flatten::<_, 2, 4, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_i32_x2, vst1_s32_x2, i32, arch::int32x2x2_t, flatten::<_, 2, 4, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_f32_x2, vst1_f32_x2, f32, arch::float32x2x2_t, flatten::<_, 2, 4, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_u64_x2, vst1_u64_x2, u64, arch::uint64x1x2_t);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_i64_x2, vst1_s64_x2, i64, arch::int64x1x2_t);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_f64_x2, vst1_f64_x2, f64, arch::float64x1x2_t);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_u8_x3, vst1_u8_x3, u8, arch::uint8x8x3_t, flatten::<_, 3, 24, 8>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_i8_x3, vst1_s8_x3, i8, arch::int8x8x3_t, flatten::<_, 3, 24, 8>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_u16_x3, vst1_u16_x3, u16, arch::uint16x4x3_t, flatten::<_, 3, 12, 4>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_i16_x3, vst1_s16_x3, i16, arch::int16x4x3_t, flatten::<_, 3, 12, 4>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_u32_x3, vst1_u32_x3, u32, arch::uint32x2x3_t, flatten::<_, 3, 6, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_i32_x3, vst1_s32_x3, i32, arch::int32x2x3_t, flatten::<_, 3, 6, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_f32_x3, vst1_f32_x3, f32, arch::float32x2x3_t, flatten::<_, 3, 6, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_u64_x3, vst1_u64_x3, u64, arch::uint64x1x3_t);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_i64_x3, vst1_s64_x3, i64, arch::int64x1x3_t);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_f64_x3, vst1_f64_x3, f64, arch::float64x1x3_t);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_u8_x4, vst1_u8_x4, u8, arch::uint8x8x4_t, flatten::<_, 4, 32, 8>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_i8_x4, vst1_s8_x4, i8, arch::int8x8x4_t, flatten::<_, 4, 32, 8>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_u16_x4, vst1_u16_x4, u16, arch::uint16x4x4_t, flatten::<_, 4, 16, 4>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_i16_x4, vst1_s16_x4, i16, arch::int16x4x4_t, flatten::<_, 4, 16, 4>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_u32_x4, vst1_u32_x4, u32, arch::uint32x2x4_t, flatten::<_, 4, 8, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_i32_x4, vst1_s32_x4, i32, arch::int32x2x4_t, flatten::<_, 4, 8, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_f32_x4, vst1_f32_x4, f32, arch::float32x2x4_t, flatten::<_, 4, 8, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_u64_x4, vst1_u64_x4, u64, arch::uint64x1x4_t);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_i64_x4, vst1_s64_x4, i64, arch::int64x1x4_t);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1_f64_x4, vst1_f64_x4, f64, arch::float64x1x4_t);
test_vst1_from_slice!(fn test_vst1q_u8, vst1q_u8, u8, arch::uint8x16_t);
test_vst1_from_slice!(fn test_vst1q_i8, vst1q_s8, i8, arch::int8x16_t);
test_vst1_from_slice!(fn test_vst1q_u16, vst1q_u16, u16, arch::uint16x8_t);
test_vst1_from_slice!(fn test_vst1q_i16, vst1q_s16, i16, arch::int16x8_t);
test_vst1_from_slice!(fn test_vst1q_u32, vst1q_u32, u32, arch::uint32x4_t);
test_vst1_from_slice!(fn test_vst1q_i32, vst1q_s32, i32, arch::int32x4_t);
test_vst1_from_slice!(fn test_vst1q_f32, vst1q_f32, f32, arch::float32x4_t);
test_vst1_from_slice!(fn test_vst1q_u64, vst1q_u64, u64, arch::uint64x2_t);
test_vst1_from_slice!(fn test_vst1q_i64, vst1q_s64, i64, arch::int64x2_t);
test_vst1_from_slice!(fn test_vst1q_f64, vst1q_f64, f64, arch::float64x2_t);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_u8_x2, vst1q_u8_x2, u8, arch::uint8x16x2_t, flatten::<_, 2, 32, 16>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_i8_x2, vst1q_s8_x2, i8, arch::int8x16x2_t, flatten::<_, 2, 32, 16>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_u16_x2, vst1q_u16_x2, u16, arch::uint16x8x2_t, flatten::<_, 2, 16, 8>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_i16_x2, vst1q_s16_x2, i16, arch::int16x8x2_t, flatten::<_, 2, 16, 8>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_u32_x2, vst1q_u32_x2, u32, arch::uint32x4x2_t, flatten::<_, 2, 8, 4>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_i32_x2, vst1q_s32_x2, i32, arch::int32x4x2_t, flatten::<_, 2, 8, 4>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_f32_x2, vst1q_f32_x2, f32, arch::float32x4x2_t, flatten::<_, 2, 8, 4>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_u64_x2, vst1q_u64_x2, u64, arch::uint64x2x2_t, flatten::<_, 2, 4, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_i64_x2, vst1q_s64_x2, i64, arch::int64x2x2_t, flatten::<_, 2, 4, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_f64_x2, vst1q_f64_x2, f64, arch::float64x2x2_t, flatten::<_, 2, 4, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_u8_x3, vst1q_u8_x3, u8, arch::uint8x16x3_t, flatten::<_, 3, 48, 16>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_i8_x3, vst1q_s8_x3, i8, arch::int8x16x3_t, flatten::<_, 3, 48, 16>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_u16_x3, vst1q_u16_x3, u16, arch::uint16x8x3_t, flatten::<_, 3, 24, 8>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_i16_x3, vst1q_s16_x3, i16, arch::int16x8x3_t, flatten::<_, 3, 24, 8>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_u32_x3, vst1q_u32_x3, u32, arch::uint32x4x3_t, flatten::<_, 3, 12, 4>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_i32_x3, vst1q_s32_x3, i32, arch::int32x4x3_t, flatten::<_, 3, 12, 4>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_f32_x3, vst1q_f32_x3, f32, arch::float32x4x3_t, flatten::<_, 3, 12, 4>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_u64_x3, vst1q_u64_x3, u64, arch::uint64x2x3_t, flatten::<_, 3, 6, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_i64_x3, vst1q_s64_x3, i64, arch::int64x2x3_t, flatten::<_, 3, 6, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_f64_x3, vst1q_f64_x3, f64, arch::float64x2x3_t, flatten::<_, 3, 6, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_u8_x4, vst1q_u8_x4, u8, arch::uint8x16x4_t, flatten::<_, 4, 64, 16>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_i8_x4, vst1q_s8_x4, i8, arch::int8x16x4_t, flatten::<_, 4, 64, 16>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_u16_x4, vst1q_u16_x4, u16, arch::uint16x8x4_t, flatten::<_, 4, 32, 8>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_i16_x4, vst1q_s16_x4, i16, arch::int16x8x4_t, flatten::<_, 4, 32, 8>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_u32_x4, vst1q_u32_x4, u32, arch::uint32x4x4_t, flatten::<_, 4, 16, 4>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_i32_x4, vst1q_s32_x4, i32, arch::int32x4x4_t, flatten::<_, 4, 16, 4>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_f32_x4, vst1q_f32_x4, f32, arch::float32x4x4_t, flatten::<_, 4, 16, 4>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_u64_x4, vst1q_u64_x4, u64, arch::uint64x2x4_t, flatten::<_, 4, 8, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_i64_x4, vst1q_s64_x4, i64, arch::int64x2x4_t, flatten::<_, 4, 8, 2>);
test_vst1_from_slice!(#[cfg_attr(miri, ignore)] fn test_vst1q_f64_x4, vst1q_f64_x4, f64, arch::float64x2x4_t, flatten::<_, 4, 8, 2>);
macro_rules! test_vldup {
($(#[$attr:meta])*
fn [$test1:ident, $test2:ident, $test3:ident, $test4:ident],
[$intr1:ident, $intr2:ident, $intr3:ident, $intr4:ident],
$base:ty,
[$ty1:ty, $ty2:ty, $ty3:ty, $ty4:ty]
) => {
test_vldup!(@$(#[$attr])* fn ([$test1], [$intr1], $base, [$ty1], 1) => |c| &c[0]);
test_vldup!(@$(#[$attr])* #[cfg_attr(miri, ignore)] fn ([$test2], [$intr2], $base, [$ty2], 2) => |c| &c);
test_vldup!(@$(#[$attr])* #[cfg_attr(miri, ignore)] fn ([$test3], [$intr3], $base, [$ty3], 3) => |c| &c);
test_vldup!(@$(#[$attr])* #[cfg_attr(miri, ignore)] fn ([$test4], [$intr4], $base, [$ty4], 4) => |c| &c);
};
(@$(#[$attr:meta])* fn ([$test1:ident], [$intr1:ident], $base:ty, [$ty1:ty], $n:expr)
=> |$arr:ident| $extract:expr) => {
#[test]
#[cfg(target_feature = "neon")]
$(#[$attr])*
fn $test1() {
fn assert_chunks(val: $ty1, expected: [$base; $n]) {
const S: usize = size_of::<$ty1>() / size_of::<$base>();
const V: usize = S / $n;
const _: () = assert!(V * $n == S);
let val = unsafe { ::core::mem::transmute::<$ty1, [$base; S]>(val) };
for (data, &expected) in val.chunks_exact(V).zip(&expected) {
let expected: [$base; V] = [expected; V];
assert_eq!(data, expected);
}
}
#[target_feature(enable = "neon")]
fn test() {
let $arr: [$base; $n] = core::array::from_fn(|i| 0x42 as $base + i as $base);
let v = super::$intr1($extract);
assert_chunks(v, $arr);
}
unsafe { test() }
}
};
}
test_vldup!(
fn [test_vld1_dup_s8, test_vld2_dup_s8, test_vld3_dup_s8, test_vld4_dup_s8],
[vld1_dup_s8, vld2_dup_s8, vld3_dup_s8, vld4_dup_s8],
i8,
[arch::int8x8_t, arch::int8x8x2_t, arch::int8x8x3_t, arch::int8x8x4_t]
);
test_vldup!(
fn [test_vld1_dup_u8, test_vld2_dup_u8, test_vld3_dup_u8, test_vld4_dup_u8],
[vld1_dup_u8, vld2_dup_u8, vld3_dup_u8, vld4_dup_u8],
u8,
[arch::uint8x8_t, arch::uint8x8x2_t, arch::uint8x8x3_t, arch::uint8x8x4_t]
);
test_vldup!(
fn [test_vld1_dup_s16, test_vld2_dup_s16, test_vld3_dup_s16, test_vld4_dup_s16],
[vld1_dup_s16, vld2_dup_s16, vld3_dup_s16, vld4_dup_s16],
i16,
[arch::int16x4_t, arch::int16x4x2_t, arch::int16x4x3_t, arch::int16x4x4_t]
);
test_vldup!(
fn [test_vld1_dup_u16, test_vld2_dup_u16, test_vld3_dup_u16, test_vld4_dup_u16],
[vld1_dup_u16, vld2_dup_u16, vld3_dup_u16, vld4_dup_u16],
u16,
[arch::uint16x4_t, arch::uint16x4x2_t, arch::uint16x4x3_t, arch::uint16x4x4_t]
);
test_vldup!(
fn [test_vld1_dup_s32, test_vld2_dup_s32, test_vld3_dup_s32, test_vld4_dup_s32],
[vld1_dup_s32, vld2_dup_s32, vld3_dup_s32, vld4_dup_s32],
i32,
[arch::int32x2_t, arch::int32x2x2_t, arch::int32x2x3_t, arch::int32x2x4_t]
);
test_vldup!(
fn [test_vld1_dup_u32, test_vld2_dup_u32, test_vld3_dup_u32, test_vld4_dup_u32],
[vld1_dup_u32, vld2_dup_u32, vld3_dup_u32, vld4_dup_u32],
u32,
[arch::uint32x2_t, arch::uint32x2x2_t, arch::uint32x2x3_t, arch::uint32x2x4_t]
);
test_vldup!(
fn [test_vld1_dup_f32, test_vld2_dup_f32, test_vld3_dup_f32, test_vld4_dup_f32],
[vld1_dup_f32, vld2_dup_f32, vld3_dup_f32, vld4_dup_f32],
f32,
[arch::float32x2_t, arch::float32x2x2_t, arch::float32x2x3_t, arch::float32x2x4_t]
);
test_vldup!(
fn [test_vld1_dup_s64, test_vld2_dup_s64, test_vld3_dup_s64, test_vld4_dup_s64],
[vld1_dup_s64, vld2_dup_s64, vld3_dup_s64, vld4_dup_s64],
i64,
[arch::int64x1_t, arch::int64x1x2_t, arch::int64x1x3_t, arch::int64x1x4_t]
);
test_vldup!(
fn [test_vld1_dup_u64, test_vld2_dup_u64, test_vld3_dup_u64, test_vld4_dup_u64],
[vld1_dup_u64, vld2_dup_u64, vld3_dup_u64, vld4_dup_u64],
u64,
[arch::uint64x1_t, arch::uint64x1x2_t, arch::uint64x1x3_t, arch::uint64x1x4_t]
);
test_vldup!(
fn [test_vld1_dup_f64, test_vld2_dup_f64, test_vld3_dup_f64, test_vld4_dup_f64],
[vld1_dup_f64, vld2_dup_f64, vld3_dup_f64, vld4_dup_f64],
f64,
[arch::float64x1_t, arch::float64x1x2_t, arch::float64x1x3_t, arch::float64x1x4_t]
);
test_vldup!(
fn [test_vld1q_dup_s8, test_vld2q_dup_s8, test_vld3q_dup_s8, test_vld4q_dup_s8],
[vld1q_dup_s8, vld2q_dup_s8, vld3q_dup_s8, vld4q_dup_s8],
i8,
[arch::int8x16_t, arch::int8x16x2_t, arch::int8x16x3_t, arch::int8x16x4_t]
);
test_vldup!(
fn [test_vld1q_dup_u8, test_vld2q_dup_u8, test_vld3q_dup_u8, test_vld4q_dup_u8],
[vld1q_dup_u8, vld2q_dup_u8, vld3q_dup_u8, vld4q_dup_u8],
u8,
[arch::uint8x16_t, arch::uint8x16x2_t, arch::uint8x16x3_t, arch::uint8x16x4_t]
);
test_vldup!(
fn [test_vld1q_dup_s16, test_vld2q_dup_s16, test_vld3q_dup_s16, test_vld4q_dup_s16],
[vld1q_dup_s16, vld2q_dup_s16, vld3q_dup_s16, vld4q_dup_s16],
i16,
[arch::int16x8_t, arch::int16x8x2_t, arch::int16x8x3_t, arch::int16x8x4_t]
);
test_vldup!(
fn [test_vld1q_dup_u16, test_vld2q_dup_u16, test_vld3q_dup_u16, test_vld4q_dup_u16],
[vld1q_dup_u16, vld2q_dup_u16, vld3q_dup_u16, vld4q_dup_u16],
u16,
[arch::uint16x8_t, arch::uint16x8x2_t, arch::uint16x8x3_t, arch::uint16x8x4_t]
);
test_vldup!(
fn [test_vld1q_dup_s32, test_vld2q_dup_s32, test_vld3q_dup_s32, test_vld4q_dup_s32],
[vld1q_dup_s32, vld2q_dup_s32, vld3q_dup_s32, vld4q_dup_s32],
i32,
[arch::int32x4_t, arch::int32x4x2_t, arch::int32x4x3_t, arch::int32x4x4_t]
);
test_vldup!(
fn [test_vld1q_dup_u32, test_vld2q_dup_u32, test_vld3q_dup_u32, test_vld4q_dup_u32],
[vld1q_dup_u32, vld2q_dup_u32, vld3q_dup_u32, vld4q_dup_u32],
u32,
[arch::uint32x4_t, arch::uint32x4x2_t, arch::uint32x4x3_t, arch::uint32x4x4_t]
);
test_vldup!(
fn [test_vld1q_dup_f32, test_vld2q_dup_f32, test_vld3q_dup_f32, test_vld4q_dup_f32],
[vld1q_dup_f32, vld2q_dup_f32, vld3q_dup_f32, vld4q_dup_f32],
f32,
[arch::float32x4_t, arch::float32x4x2_t, arch::float32x4x3_t, arch::float32x4x4_t]
);
test_vldup!(
fn [test_vld1q_dup_s64, test_vld2q_dup_s64, test_vld3q_dup_s64, test_vld4q_dup_s64],
[vld1q_dup_s64, vld2q_dup_s64, vld3q_dup_s64, vld4q_dup_s64],
i64,
[arch::int64x2_t, arch::int64x2x2_t, arch::int64x2x3_t, arch::int64x2x4_t]
);
test_vldup!(
fn [test_vld1q_dup_u64, test_vld2q_dup_u64, test_vld3q_dup_u64, test_vld4q_dup_u64],
[vld1q_dup_u64, vld2q_dup_u64, vld3q_dup_u64, vld4q_dup_u64],
u64,
[arch::uint64x2_t, arch::uint64x2x2_t, arch::uint64x2x3_t, arch::uint64x2x4_t]
);
test_vldup!(
fn [test_vld1q_dup_f64, test_vld2q_dup_f64, test_vld3q_dup_f64, test_vld4q_dup_f64],
[vld1q_dup_f64, vld2q_dup_f64, vld3q_dup_f64, vld4q_dup_f64],
f64,
[arch::float64x2_t, arch::float64x2x2_t, arch::float64x2x3_t, arch::float64x2x4_t]
);
macro_rules! test_vldNq_deinterleave {
($(#[$attr:meta])* fn $testname:ident, $intrinsic:ident, $base:ty, $ty:ty, $lanes:expr, $regs:expr) => {
#[test]
#[cfg(target_feature = "neon")]
$(#[$attr])*
fn $testname() {
fn assert_eq_regs<const LANES: usize, const REGS: usize>(
result: $ty,
expected: [[$base; LANES]; REGS],
) {
assert!(
core::mem::size_of::<$ty>()
== core::mem::size_of::<[[$base; LANES]; REGS]>()
);
let result =
unsafe { core::mem::transmute_copy::<$ty, [[$base; LANES]; REGS]>(&result) };
assert_eq!(result, expected);
}
#[target_feature(enable = "neon")]
fn test() {
const LANES: usize = $lanes;
const REGS: usize = $regs;
const TOTAL: usize = LANES * REGS;
let source: [$base; TOTAL] = core::array::from_fn(|i| i as $base);
let expected: [[$base; LANES]; REGS] = core::array::from_fn(|reg| {
core::array::from_fn(|lane| (reg + lane * REGS) as $base)
});
let result: $ty = super::$intrinsic(&source);
assert_eq_regs::<LANES, REGS>(result, expected);
}
unsafe { test() }
}
};
}
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld2q_u8, vld2q_u8, u8, arch::uint8x16x2_t, 16, 2);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld2q_s8, vld2q_s8, i8, arch::int8x16x2_t, 16, 2);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld2q_u16, vld2q_u16, u16, arch::uint16x8x2_t, 8, 2);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld2q_s16, vld2q_s16, i16, arch::int16x8x2_t, 8, 2);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld2q_u32, vld2q_u32, u32, arch::uint32x4x2_t, 4, 2);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld2q_s32, vld2q_s32, i32, arch::int32x4x2_t, 4, 2);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld2q_f32, vld2q_f32, f32, arch::float32x4x2_t, 4, 2);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld2q_u64, vld2q_u64, u64, arch::uint64x2x2_t, 2, 2);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld2q_s64, vld2q_s64, i64, arch::int64x2x2_t, 2, 2);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld2q_f64, vld2q_f64, f64, arch::float64x2x2_t, 2, 2);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld3q_u8, vld3q_u8, u8, arch::uint8x16x3_t, 16, 3);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld3q_s8, vld3q_s8, i8, arch::int8x16x3_t, 16, 3);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld3q_u16, vld3q_u16, u16, arch::uint16x8x3_t, 8, 3);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld3q_s16, vld3q_s16, i16, arch::int16x8x3_t, 8, 3);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld3q_u32, vld3q_u32, u32, arch::uint32x4x3_t, 4, 3);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld3q_s32, vld3q_s32, i32, arch::int32x4x3_t, 4, 3);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld3q_f32, vld3q_f32, f32, arch::float32x4x3_t, 4, 3);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld3q_u64, vld3q_u64, u64, arch::uint64x2x3_t, 2, 3);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld3q_s64, vld3q_s64, i64, arch::int64x2x3_t, 2, 3);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld3q_f64, vld3q_f64, f64, arch::float64x2x3_t, 2, 3);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld4q_u8, vld4q_u8, u8, arch::uint8x16x4_t, 16, 4);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld4q_s8, vld4q_s8, i8, arch::int8x16x4_t, 16, 4);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld4q_u16, vld4q_u16, u16, arch::uint16x8x4_t, 8, 4);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld4q_s16, vld4q_s16, i16, arch::int16x8x4_t, 8, 4);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld4q_u32, vld4q_u32, u32, arch::uint32x4x4_t, 4, 4);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld4q_s32, vld4q_s32, i32, arch::int32x4x4_t, 4, 4);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld4q_f32, vld4q_f32, f32, arch::float32x4x4_t, 4, 4);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld4q_u64, vld4q_u64, u64, arch::uint64x2x4_t, 2, 4);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld4q_s64, vld4q_s64, i64, arch::int64x2x4_t, 2, 4);
test_vldNq_deinterleave!(#[cfg_attr(miri, ignore)] fn test_vld4q_f64, vld4q_f64, f64, arch::float64x2x4_t, 2, 4);
macro_rules! test_vstNq_interleave {
($(#[$attr:meta])* fn $testname:ident, $intrinsic:ident, $base:ty, $ty:ty, $lanes:expr, $regs:expr) => {
#[test]
#[cfg(target_feature = "neon")]
$(#[$attr])*
fn $testname() {
fn generate_regs<const LANES: usize, const REGS: usize>(
source: [[$base; LANES]; REGS],
) -> $ty {
assert!(
core::mem::size_of::<$ty>()
== core::mem::size_of::<[[$base; LANES]; REGS]>()
);
unsafe { core::mem::transmute_copy::<[[$base; LANES]; REGS], $ty>(&source) }
}
#[target_feature(enable = "neon")]
fn test() {
const LANES: usize = $lanes;
const REGS: usize = $regs;
const TOTAL: usize = LANES * REGS;
let source: [[$base; LANES]; REGS] = core::array::from_fn(|reg| {
core::array::from_fn(|lane| (reg + lane * REGS) as $base)
});
let expected: [$base; TOTAL] = core::array::from_fn(|i| i as $base);
let argument = generate_regs::<LANES, REGS>(source);
let mut result: [$base; TOTAL] = [0 as $base; TOTAL];
super::$intrinsic(&mut result, argument);
assert_eq!(result, expected);
}
unsafe { test() }
}
};
}
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst2q_u8, vst2q_u8, u8, arch::uint8x16x2_t, 16, 2);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst2q_s8, vst2q_s8, i8, arch::int8x16x2_t, 16, 2);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst2q_u16, vst2q_u16, u16, arch::uint16x8x2_t, 8, 2);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst2q_s16, vst2q_s16, i16, arch::int16x8x2_t, 8, 2);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst2q_u32, vst2q_u32, u32, arch::uint32x4x2_t, 4, 2);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst2q_s32, vst2q_s32, i32, arch::int32x4x2_t, 4, 2);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst2q_f32, vst2q_f32, f32, arch::float32x4x2_t, 4, 2);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst2q_u64, vst2q_u64, u64, arch::uint64x2x2_t, 2, 2);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst2q_s64, vst2q_s64, i64, arch::int64x2x2_t, 2, 2);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst2q_f64, vst2q_f64, f64, arch::float64x2x2_t, 2, 2);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst3q_u8, vst3q_u8, u8, arch::uint8x16x3_t, 16, 3);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst3q_s8, vst3q_s8, i8, arch::int8x16x3_t, 16, 3);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst3q_u16, vst3q_u16, u16, arch::uint16x8x3_t, 8, 3);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst3q_s16, vst3q_s16, i16, arch::int16x8x3_t, 8, 3);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst3q_u32, vst3q_u32, u32, arch::uint32x4x3_t, 4, 3);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst3q_s32, vst3q_s32, i32, arch::int32x4x3_t, 4, 3);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst3q_f32, vst3q_f32, f32, arch::float32x4x3_t, 4, 3);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst3q_u64, vst3q_u64, u64, arch::uint64x2x3_t, 2, 3);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst3q_s64, vst3q_s64, i64, arch::int64x2x3_t, 2, 3);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst3q_f64, vst3q_f64, f64, arch::float64x2x3_t, 2, 3);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst4q_u8, vst4q_u8, u8, arch::uint8x16x4_t, 16, 4);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst4q_s8, vst4q_s8, i8, arch::int8x16x4_t, 16, 4);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst4q_u16, vst4q_u16, u16, arch::uint16x8x4_t, 8, 4);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst4q_s16, vst4q_s16, i16, arch::int16x8x4_t, 8, 4);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst4q_u32, vst4q_u32, u32, arch::uint32x4x4_t, 4, 4);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst4q_s32, vst4q_s32, i32, arch::int32x4x4_t, 4, 4);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst4q_f32, vst4q_f32, f32, arch::float32x4x4_t, 4, 4);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst4q_u64, vst4q_u64, u64, arch::uint64x2x4_t, 2, 4);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst4q_s64, vst4q_s64, i64, arch::int64x2x4_t, 2, 4);
test_vstNq_interleave!(#[cfg_attr(miri, ignore)] fn test_vst4q_f64, vst4q_f64, f64, arch::float64x2x4_t, 2, 4);
}