use std::arch::aarch64::{self, float32x2_t, float32x4_t, int32x2_t, int32x4_t};
use std::arch::aarch64::{uint32x2_t, uint32x4_t};
use std::f32;
use std::fmt::{self, Debug, Formatter};
use std::intrinsics::simd::*;
use std::mem;
use std::ops::{Add, BitAnd, BitOr, Div, Index, IndexMut, Mul, Not, Shr, Sub};
mod swizzle_f32x4;
mod swizzle_i32x4;
#[repr(simd)]
pub(crate) struct Simd<T, const N: usize>([T; N]);
macro_rules! simd_shuffle2 {
($x:expr, $y:expr, <$(const $imm:ident : $ty:ty),+> $idx:expr $(,)?) => {{
struct ConstParam<$(const $imm: $ty),+>;
impl<$(const $imm: $ty),+> ConstParam<$($imm),+> {
const IDX: Simd<u32, 2> = Simd($idx);
}
simd_shuffle($x, $y, ConstParam::<$($imm),+>::IDX)
}};
($x:expr, $y:expr, $idx:expr $(,)?) => {{
const IDX: Simd<u32, 2> = Simd($idx);
simd_shuffle($x, $y, IDX)
}};
}
macro_rules! simd_shuffle4 {
($x:expr, $y:expr, <$(const $imm:ident : $ty:ty),+> $idx:expr $(,)?) => {{
struct ConstParam<$(const $imm: $ty),+>;
impl<$(const $imm: $ty),+> ConstParam<$($imm),+> {
const IDX: Simd<u32; 4> = Simd($idx);
}
simd_shuffle($x, $y, ConstParam::<$($imm),+>::IDX)
}};
($x:expr, $y:expr, $idx:expr $(,)?) => {{
const IDX: Simd<u32, 4> = Simd($idx);
simd_shuffle($x, $y, IDX)
}};
}
#[derive(Clone, Copy)]
pub struct F32x2(pub float32x2_t);
impl F32x2 {
#[inline]
pub fn new(a: f32, b: f32) -> F32x2 {
unsafe { F32x2(mem::transmute([a, b])) }
}
#[inline]
pub fn splat(x: f32) -> F32x2 {
F32x2::new(x, x)
}
#[inline]
pub fn approx_recip(self) -> F32x2 {
unsafe { F32x2(vrecpe_v2f32(self.0)) }
}
#[inline]
pub fn min(self, other: F32x2) -> F32x2 {
unsafe { F32x2(simd_minimum_number_nsz(self.0, other.0)) }
}
#[inline]
pub fn max(self, other: F32x2) -> F32x2 {
unsafe { F32x2(simd_maximum_number_nsz(self.0, other.0)) }
}
#[inline]
pub fn clamp(self, min: F32x2, max: F32x2) -> F32x2 {
self.max(min).min(max)
}
#[inline]
pub fn abs(self) -> F32x2 {
unsafe { F32x2(fabs_v2f32(self.0)) }
}
#[inline]
pub fn floor(self) -> F32x2 {
unsafe { F32x2(floor_v2f32(self.0)) }
}
#[inline]
pub fn ceil(self) -> F32x2 {
unsafe { F32x2(ceil_v2f32(self.0)) }
}
#[inline]
pub fn sqrt(self) -> F32x2 {
unsafe { F32x2(sqrt_v2f32(self.0)) }
}
#[inline]
pub fn packed_eq(self, other: F32x2) -> U32x2 {
unsafe { U32x2(simd_eq(self.0, other.0)) }
}
#[inline]
pub fn packed_gt(self, other: F32x2) -> U32x2 {
unsafe { U32x2(simd_gt(self.0, other.0)) }
}
#[inline]
pub fn packed_lt(self, other: F32x2) -> U32x2 {
unsafe { U32x2(simd_lt(self.0, other.0)) }
}
#[inline]
pub fn packed_le(self, other: F32x2) -> U32x2 {
unsafe { U32x2(simd_le(self.0, other.0)) }
}
#[inline]
pub fn to_f32x4(self) -> F32x4 {
self.concat_xy_xy(F32x2::default())
}
#[inline]
pub fn to_i32x2(self) -> I32x2 {
unsafe { I32x2(simd_cast(round_v2f32(self.0))) }
}
#[inline]
pub fn to_i32x4(self) -> I32x4 {
self.to_i32x2().concat_xy_xy(I32x2::default())
}
#[inline]
pub fn yx(self) -> F32x2 {
unsafe { F32x2(simd_shuffle2!(self.0, self.0, [1, 0])) }
}
#[inline]
pub fn concat_xy_xy(self, other: F32x2) -> F32x4 {
unsafe { F32x4(simd_shuffle4!(self.0, other.0, [0, 1, 2, 3])) }
}
}
impl Default for F32x2 {
#[inline]
fn default() -> F32x2 {
F32x2::new(0.0, 0.0)
}
}
impl Debug for F32x2 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "<{}, {}>", self[0], self[1])
}
}
impl Index<usize> for F32x2 {
type Output = f32;
#[inline]
fn index(&self, index: usize) -> &f32 {
unsafe {
assert!(index < 2);
let ptr = &self.0 as *const float32x2_t as *const f32;
mem::transmute::<*const f32, &f32>(ptr.offset(index as isize))
}
}
}
impl IndexMut<usize> for F32x2 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut f32 {
unsafe {
assert!(index < 2);
let ptr = &mut self.0 as *mut float32x2_t as *mut f32;
mem::transmute::<*mut f32, &mut f32>(ptr.offset(index as isize))
}
}
}
impl Add<F32x2> for F32x2 {
type Output = F32x2;
#[inline]
fn add(self, other: F32x2) -> F32x2 {
unsafe { F32x2(simd_add(self.0, other.0)) }
}
}
impl Div<F32x2> for F32x2 {
type Output = F32x2;
#[inline]
fn div(self, other: F32x2) -> F32x2 {
unsafe { F32x2(simd_div(self.0, other.0)) }
}
}
impl Mul<F32x2> for F32x2 {
type Output = F32x2;
#[inline]
fn mul(self, other: F32x2) -> F32x2 {
unsafe { F32x2(simd_mul(self.0, other.0)) }
}
}
impl Sub<F32x2> for F32x2 {
type Output = F32x2;
#[inline]
fn sub(self, other: F32x2) -> F32x2 {
unsafe { F32x2(simd_sub(self.0, other.0)) }
}
}
impl PartialEq for F32x2 {
#[inline]
fn eq(&self, other: &F32x2) -> bool {
self.packed_eq(*other).all_true()
}
}
#[derive(Clone, Copy)]
pub struct F32x4(pub float32x4_t);
impl F32x4 {
#[inline]
pub fn new(a: f32, b: f32, c: f32, d: f32) -> F32x4 {
unsafe { F32x4(mem::transmute([a, b, c, d])) }
}
#[inline]
pub fn splat(x: f32) -> F32x4 {
F32x4::new(x, x, x, x)
}
#[inline]
pub fn approx_recip(self) -> F32x4 {
unsafe { F32x4(vrecpe_v4f32(self.0)) }
}
#[inline]
pub fn min(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_minimum_number_nsz(self.0, other.0)) }
}
#[inline]
pub fn max(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_maximum_number_nsz(self.0, other.0)) }
}
#[inline]
pub fn clamp(self, min: F32x4, max: F32x4) -> F32x4 {
self.max(min).min(max)
}
#[inline]
pub fn abs(self) -> F32x4 {
unsafe { F32x4(fabs_v4f32(self.0)) }
}
#[inline]
pub fn floor(self) -> F32x4 {
unsafe { F32x4(floor_v4f32(self.0)) }
}
#[inline]
pub fn ceil(self) -> F32x4 {
unsafe { F32x4(ceil_v4f32(self.0)) }
}
#[inline]
pub fn sqrt(self) -> F32x4 {
unsafe { F32x4(sqrt_v4f32(self.0)) }
}
#[inline]
pub fn packed_eq(self, other: F32x4) -> U32x4 {
unsafe { U32x4(simd_eq(self.0, other.0)) }
}
#[inline]
pub fn packed_gt(self, other: F32x4) -> U32x4 {
unsafe { U32x4(simd_gt(self.0, other.0)) }
}
#[inline]
pub fn packed_le(self, other: F32x4) -> U32x4 {
unsafe { U32x4(simd_le(self.0, other.0)) }
}
#[inline]
pub fn packed_lt(self, other: F32x4) -> U32x4 {
unsafe { U32x4(simd_lt(self.0, other.0)) }
}
#[inline]
pub fn xy(self) -> F32x2 {
unsafe { F32x2(simd_shuffle2!(self.0, self.0, [0, 1])) }
}
#[inline]
pub fn yx(self) -> F32x2 {
unsafe { F32x2(simd_shuffle2!(self.0, self.0, [1, 0])) }
}
#[inline]
pub fn xw(self) -> F32x2 {
unsafe { F32x2(simd_shuffle2!(self.0, self.0, [0, 3])) }
}
#[inline]
pub fn zy(self) -> F32x2 {
unsafe { F32x2(simd_shuffle2!(self.0, self.0, [2, 1])) }
}
#[inline]
pub fn zw(self) -> F32x2 {
unsafe { F32x2(simd_shuffle2!(self.0, self.0, [2, 3])) }
}
#[inline]
pub fn concat_xy_xy(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_shuffle4!(self.0, other.0, [0, 1, 4, 5])) }
}
#[inline]
pub fn concat_xy_zw(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_shuffle4!(self.0, other.0, [0, 1, 6, 7])) }
}
#[inline]
pub fn concat_zw_zw(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_shuffle4!(self.0, other.0, [2, 3, 6, 7])) }
}
#[inline]
pub fn concat_wz_yx(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_shuffle4!(self.0, other.0, [3, 2, 5, 4])) }
}
#[inline]
pub fn to_i32x4(self) -> I32x4 {
unsafe { I32x4(simd_cast(round_v4f32(self.0))) }
}
}
impl Default for F32x4 {
#[inline]
fn default() -> F32x4 {
F32x4::new(0.0, 0.0, 0.0, 0.0)
}
}
impl Index<usize> for F32x4 {
type Output = f32;
#[inline]
fn index(&self, index: usize) -> &f32 {
unsafe {
assert!(index < 4);
let ptr = &self.0 as *const float32x4_t as *const f32;
mem::transmute::<*const f32, &f32>(ptr.offset(index as isize))
}
}
}
impl IndexMut<usize> for F32x4 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut f32 {
unsafe {
assert!(index < 4);
let ptr = &mut self.0 as *mut float32x4_t as *mut f32;
mem::transmute::<*mut f32, &mut f32>(ptr.offset(index as isize))
}
}
}
impl Debug for F32x4 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
}
}
impl PartialEq for F32x4 {
#[inline]
fn eq(&self, other: &F32x4) -> bool {
self.packed_eq(*other).all_true()
}
}
impl Add<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn add(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_add(self.0, other.0)) }
}
}
impl Div<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn div(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_div(self.0, other.0)) }
}
}
impl Mul<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn mul(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_mul(self.0, other.0)) }
}
}
impl Sub<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn sub(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_sub(self.0, other.0)) }
}
}
#[derive(Clone, Copy, Debug)]
pub struct I32x2(pub int32x2_t);
impl I32x2 {
#[inline]
pub fn new(x: i32, y: i32) -> I32x2 {
unsafe { I32x2(mem::transmute([x, y])) }
}
#[inline]
pub fn splat(x: i32) -> I32x2 {
I32x2::new(x, x)
}
#[inline]
pub fn x(self) -> i32 {
self[0]
}
#[inline]
pub fn y(self) -> i32 {
self[1]
}
#[inline]
pub fn packed_eq(self, other: I32x2) -> U32x2 {
unsafe { U32x2(simd_eq(self.0, other.0)) }
}
#[inline]
pub fn max(self, other: I32x2) -> I32x2 {
self.to_i32x4().max(other.to_i32x4()).xy()
}
#[inline]
pub fn min(self, other: I32x2) -> I32x2 {
self.to_i32x4().min(other.to_i32x4()).xy()
}
#[inline]
pub fn concat_xy_xy(self, other: I32x2) -> I32x4 {
unsafe { I32x4(simd_shuffle4!(self.0, other.0, [0, 1, 2, 3])) }
}
#[inline]
pub fn to_f32x2(self) -> F32x2 {
unsafe { F32x2(simd_cast(self.0)) }
}
#[inline]
pub fn to_i32x4(self) -> I32x4 {
self.concat_xy_xy(I32x2::default())
}
}
impl Default for I32x2 {
#[inline]
fn default() -> I32x2 {
I32x2::splat(0)
}
}
impl PartialEq for I32x2 {
#[inline]
fn eq(&self, other: &I32x2) -> bool {
self.packed_eq(*other).all_true()
}
}
impl Index<usize> for I32x2 {
type Output = i32;
#[inline]
fn index(&self, index: usize) -> &i32 {
unsafe {
assert!(index < 2);
let ptr = &self.0 as *const int32x2_t as *const i32;
mem::transmute::<*const i32, &i32>(ptr.offset(index as isize))
}
}
}
impl IndexMut<usize> for I32x2 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut i32 {
unsafe {
assert!(index < 2);
let ptr = &mut self.0 as *mut int32x2_t as *mut i32;
mem::transmute::<*mut i32, &mut i32>(ptr.offset(index as isize))
}
}
}
impl Add<I32x2> for I32x2 {
type Output = I32x2;
#[inline]
fn add(self, other: I32x2) -> I32x2 {
unsafe { I32x2(simd_add(self.0, other.0)) }
}
}
impl Sub<I32x2> for I32x2 {
type Output = I32x2;
#[inline]
fn sub(self, other: I32x2) -> I32x2 {
unsafe { I32x2(simd_sub(self.0, other.0)) }
}
}
impl Mul<I32x2> for I32x2 {
type Output = I32x2;
#[inline]
fn mul(self, other: I32x2) -> I32x2 {
unsafe { I32x2(simd_mul(self.0, other.0)) }
}
}
#[derive(Clone, Copy, Debug)]
pub struct I32x4(pub int32x4_t);
impl I32x4 {
#[inline]
pub fn new(a: i32, b: i32, c: i32, d: i32) -> I32x4 {
unsafe { I32x4(mem::transmute([a, b, c, d])) }
}
#[inline]
pub fn splat(x: i32) -> I32x4 {
I32x4::new(x, x, x, x)
}
#[inline]
pub fn max(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_cast(simd_maximum_number_nsz(self.to_f32x4().0, other.to_f32x4().0))) }
}
#[inline]
pub fn min(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_cast(simd_minimum_number_nsz(self.to_f32x4().0, other.to_f32x4().0))) }
}
#[inline]
pub fn packed_eq(self, other: I32x4) -> U32x4 {
unsafe { U32x4(simd_eq(self.0, other.0)) }
}
#[inline]
pub fn packed_le(self, other: I32x4) -> U32x4 {
unsafe { U32x4(simd_le(self.0, other.0)) }
}
#[inline]
pub fn packed_lt(self, other: I32x4) -> U32x4 {
unsafe { U32x4(simd_lt(self.0, other.0)) }
}
#[inline]
pub fn concat_xy_xy(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_shuffle4!(self.0, other.0, [0, 1, 4, 5])) }
}
#[inline]
pub fn concat_zw_zw(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_shuffle4!(self.0, other.0, [2, 3, 6, 7])) }
}
#[inline]
pub fn xy(self) -> I32x2 {
unsafe { I32x2(simd_shuffle2!(self.0, self.0, [0, 1])) }
}
#[inline]
pub fn yx(self) -> I32x2 {
unsafe { I32x2(simd_shuffle2!(self.0, self.0, [1, 0])) }
}
#[inline]
pub fn xw(self) -> I32x2 {
unsafe { I32x2(simd_shuffle2!(self.0, self.0, [0, 3])) }
}
#[inline]
pub fn zy(self) -> I32x2 {
unsafe { I32x2(simd_shuffle2!(self.0, self.0, [2, 1])) }
}
#[inline]
pub fn zw(self) -> I32x2 {
unsafe { I32x2(simd_shuffle2!(self.0, self.0, [2, 3])) }
}
#[inline]
pub fn to_f32x4(self) -> F32x4 {
unsafe { F32x4(simd_cast(self.0)) }
}
}
impl Default for I32x4 {
#[inline]
fn default() -> I32x4 {
I32x4::new(0, 0, 0, 0)
}
}
impl Index<usize> for I32x4 {
type Output = i32;
#[inline]
fn index(&self, index: usize) -> &i32 {
unsafe {
assert!(index < 4);
let ptr = &self.0 as *const int32x4_t as *const i32;
mem::transmute::<*const i32, &i32>(ptr.offset(index as isize))
}
}
}
impl IndexMut<usize> for I32x4 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut i32 {
unsafe {
assert!(index < 4);
let ptr = &mut self.0 as *mut int32x4_t as *mut i32;
mem::transmute::<*mut i32, &mut i32>(ptr.offset(index as isize))
}
}
}
impl Add<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn add(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_add(self.0, other.0)) }
}
}
impl Sub<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn sub(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_sub(self.0, other.0)) }
}
}
impl Mul<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn mul(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_mul(self.0, other.0)) }
}
}
impl PartialEq for I32x4 {
#[inline]
fn eq(&self, other: &I32x4) -> bool {
self.packed_eq(*other).all_true()
}
}
impl BitAnd<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn bitand(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_and(self.0, other.0)) }
}
}
impl BitOr<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn bitor(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_or(self.0, other.0)) }
}
}
impl Shr<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn shr(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_shr(self.0, other.0)) }
}
}
#[derive(Clone, Copy)]
pub struct U32x2(pub uint32x2_t);
impl U32x2 {
#[inline]
pub fn new(x: u32, y: u32) -> U32x2 {
unsafe { U32x2(mem::transmute([x, y])) }
}
#[inline]
pub fn splat(x: u32) -> U32x2 {
U32x2::new(x, x)
}
#[inline]
pub fn all_true(&self) -> bool {
unsafe { aarch64::vminv_u32(self.0) == !0 }
}
#[inline]
pub fn all_false(&self) -> bool {
unsafe { aarch64::vmaxv_u32(self.0) == 0 }
}
#[inline]
pub fn to_i32x2(self) -> I32x2 {
unsafe { I32x2(simd_cast(self.0)) }
}
}
impl Index<usize> for U32x2 {
type Output = u32;
#[inline]
fn index(&self, index: usize) -> &u32 {
unsafe {
assert!(index < 2);
let ptr = &self.0 as *const uint32x2_t as *const u32;
mem::transmute::<*const u32, &u32>(ptr.offset(index as isize))
}
}
}
impl Not for U32x2 {
type Output = U32x2;
#[inline]
fn not(self) -> U32x2 {
unsafe { U32x2(simd_xor(self.0, U32x2::splat(!0).0)) }
}
}
impl BitAnd<U32x2> for U32x2 {
type Output = U32x2;
#[inline]
fn bitand(self, other: U32x2) -> U32x2 {
unsafe { U32x2(simd_and(self.0, other.0)) }
}
}
impl BitOr<U32x2> for U32x2 {
type Output = U32x2;
#[inline]
fn bitor(self, other: U32x2) -> U32x2 {
unsafe { U32x2(simd_or(self.0, other.0)) }
}
}
#[derive(Clone, Copy)]
pub struct U32x4(pub uint32x4_t);
impl U32x4 {
#[inline]
pub fn new(a: u32, b: u32, c: u32, d: u32) -> U32x4 {
unsafe { U32x4(mem::transmute([a, b, c, d])) }
}
#[inline]
pub fn splat(x: u32) -> U32x4 {
U32x4::new(x, x, x, x)
}
#[inline]
pub fn all_true(&self) -> bool {
unsafe { aarch64::vminvq_u32(self.0) == !0 }
}
#[inline]
pub fn all_false(&self) -> bool {
unsafe { aarch64::vmaxvq_u32(self.0) == 0 }
}
#[inline]
pub fn packed_eq(self, other: U32x4) -> U32x4 {
unsafe { U32x4(simd_eq(self.0, other.0)) }
}
}
impl Debug for U32x4 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
}
}
impl Index<usize> for U32x4 {
type Output = u32;
#[inline]
fn index(&self, index: usize) -> &u32 {
unsafe {
assert!(index < 4);
let ptr = &self.0 as *const uint32x4_t as *const u32;
mem::transmute::<*const u32, &u32>(ptr.offset(index as isize))
}
}
}
impl PartialEq for U32x4 {
#[inline]
fn eq(&self, other: &U32x4) -> bool {
self.packed_eq(*other).all_true()
}
}
extern "C" {
#[link_name = "llvm.fabs.v2f32"]
fn fabs_v2f32(a: float32x2_t) -> float32x2_t;
#[link_name = "llvm.floor.v2f32"]
fn floor_v2f32(a: float32x2_t) -> float32x2_t;
#[link_name = "llvm.ceil.v2f32"]
fn ceil_v2f32(a: float32x2_t) -> float32x2_t;
#[link_name = "llvm.round.v2f32"]
fn round_v2f32(a: float32x2_t) -> float32x2_t;
#[link_name = "llvm.sqrt.v2f32"]
fn sqrt_v2f32(a: float32x2_t) -> float32x2_t;
#[link_name = "llvm.fabs.v4f32"]
fn fabs_v4f32(a: float32x4_t) -> float32x4_t;
#[link_name = "llvm.floor.v4f32"]
fn floor_v4f32(a: float32x4_t) -> float32x4_t;
#[link_name = "llvm.ceil.v4f32"]
fn ceil_v4f32(a: float32x4_t) -> float32x4_t;
#[link_name = "llvm.round.v4f32"]
fn round_v4f32(a: float32x4_t) -> float32x4_t;
#[link_name = "llvm.sqrt.v4f32"]
fn sqrt_v4f32(a: float32x4_t) -> float32x4_t;
#[link_name = "llvm.aarch64.neon.frecpe.v2f32"]
fn vrecpe_v2f32(a: float32x2_t) -> float32x2_t;
#[link_name = "llvm.aarch64.neon.frecpe.v4f32"]
fn vrecpe_v4f32(a: float32x4_t) -> float32x4_t;
}