use crate::pessimize_asm_values;
#[cfg(all(target_arch = "aarch64", any(target_feature = "neon", doc)))]
use crate::pessimize_tuple_structs;
#[cfg(all(target_arch = "aarch64", any(target_feature = "neon", doc)))]
use core::arch::aarch64::{
float64x1_t, float64x1x2_t, float64x1x3_t, float64x1x4_t, float64x2_t, float64x2x2_t,
float64x2x3_t, float64x2x4_t,
};
pessimize_asm_values!(allow(missing_docs) { reg: (i8, u8, i16, u16, i32, u32, isize, usize) });
#[cfg(target_arch = "aarch64")]
pessimize_asm_values!(allow(missing_docs) { reg: (i64, u64) });
#[cfg(all(
target_arch = "arm",
any(all(target_feature = "vfp2", target_feature = "d32"), doc)
))]
pessimize_asm_values!(
doc(cfg(all(target_feature = "vfp2", target_feature = "d32")))
{ dreg: (i64, u64) }
);
#[cfg(all(target_arch = "aarch64", any(target_feature = "neon", doc)))]
pessimize_asm_values!(allow(missing_docs) { vreg: (f32, f64) });
#[cfg(all(target_arch = "aarch64", not(target_feature = "neon"), not(doc)))]
pessimize_asm_values!(allow(missing_docs) { reg: (f32, f64) });
#[cfg(all(target_arch = "arm", any(target_feature = "vfp2", doc)))]
pessimize_asm_values!(allow(missing_docs) { sreg: (f32) });
#[cfg(all(
target_arch = "arm",
any(all(target_feature = "vfp2", target_feature = "d32"), doc)
))]
pessimize_asm_values!(
doc(cfg(all(target_feature = "vfp2", target_feature = "d32")))
{ dreg: (f64) }
);
#[cfg(all(target_arch = "arm", not(target_feature = "vfp2"), not(doc)))]
pessimize_asm_values!(allow(missing_docs) { reg: (f32) });
#[cfg(all(target_arch = "aarch64", any(target_feature = "neon", doc)))]
pessimize_asm_values!(
doc(cfg(target_feature = "neon"))
{ vreg: (float64x1_t, float64x2_t) }
);
#[cfg(all(target_arch = "aarch64", any(target_feature = "neon", doc)))]
pessimize_tuple_structs!(
doc(cfg(target_feature = "neon"))
{
float64x1x2_t { a: float64x1_t, b: float64x1_t },
float64x1x3_t { a: float64x1_t, b: float64x1_t, c: float64x1_t },
float64x1x4_t { a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t },
float64x2x2_t { a: float64x2_t, b: float64x2_t },
float64x2x3_t { a: float64x2_t, b: float64x2_t, c: float64x2_t },
float64x2x4_t { a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t }
}
);
#[allow(unused)]
#[cfg(feature = "nightly")]
mod portable_simd {
use crate::{arch::arm_family::*, pessimize_into_from};
use core::simd::Simd;
#[cfg(all(target_arch = "aarch64", any(target_feature = "neon", doc)))]
pessimize_into_from!(
doc(cfg(all(feature = "nightly", target_feature = "neon")))
{
float64x1_t: (Simd<f64, 1>),
float64x2_t: (Simd<f64, 2>)
}
);
}
#[allow(unused)]
#[cfg(test)]
mod tests {
use super::*;
use crate::{
pessimize_newtypes,
tests::{test_simd, test_unoptimized_value_type},
};
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
mod neon {
use super::*;
use core::arch::aarch64;
#[cfg(feature = "nightly")]
use core::simd::Simd;
#[test]
fn neon() {
test_simd::<f64, 1, TestableF64x1>(f64::MIN, f64::MAX);
test_simd::<f64, 2, TestableF64x2>(f64::MIN, f64::MAX);
test_simd::<f64, 2, TestableF64x1x2>(f64::MIN, f64::MAX);
test_simd::<f64, 3, TestableF64x1x3>(f64::MIN, f64::MAX);
test_simd::<f64, 4, TestableF64x1x4>(f64::MIN, f64::MAX);
test_simd::<f64, 4, TestableF64x2x2>(f64::MIN, f64::MAX);
test_simd::<f64, 6, TestableF64x2x3>(f64::MIN, f64::MAX);
test_simd::<f64, 8, TestableF64x2x4>(f64::MIN, f64::MAX);
#[cfg(feature = "nightly")]
{
test_simd::<f64, 1, Simd<f64, 1>>(f64::MIN, f64::MAX);
test_simd::<f64, 2, Simd<f64, 2>>(f64::MIN, f64::MAX);
}
}
#[test]
#[ignore]
fn neon_optim() {
test_unoptimized_value_type::<TestableF64x1>();
test_unoptimized_value_type::<TestableF64x2>();
test_unoptimized_value_type::<TestableF64x1x2>();
test_unoptimized_value_type::<TestableF64x1x3>();
test_unoptimized_value_type::<TestableF64x1x4>();
test_unoptimized_value_type::<TestableF64x2x2>();
test_unoptimized_value_type::<TestableF64x2x3>();
test_unoptimized_value_type::<TestableF64x2x4>();
#[cfg(feature = "nightly")]
{
test_unoptimized_value_type::<Simd<f64, 1>>();
test_unoptimized_value_type::<Simd<f64, 2>>();
}
}
macro_rules! abstract_float64xN_t {
(
$(
($name:ident, $inner:ident, $lanes:expr, $load:ident, $store:ident)
),*
) => {
$(
#[derive(Clone, Copy, Debug)]
struct $name($inner);
impl From<[f64; $lanes]> for $name {
#[inline]
fn from(x: [f64; $lanes]) -> Self {
unsafe {
let x = core::mem::transmute::<[f64; $lanes], $inner>(x);
Self(aarch64::$load((&x) as *const $inner as *const f64))
}
}
}
impl Default for $name {
#[inline]
fn default() -> Self {
Self::from([0.0; $lanes])
}
}
impl PartialEq for $name {
#[inline]
fn eq(&self, other: &Self) -> bool {
let value = |x: &Self| -> [f64; $lanes] {
let mut result = Self::from([0.0; $lanes]);
unsafe {
aarch64::$store(
(&mut result) as *mut Self as *mut f64,
x.0,
);
core::mem::transmute::<Self, [f64; $lanes]>(result)
}
};
value(self) == value(other)
}
}
pessimize_newtypes!( allow(missing_docs) { $name{ $inner } } );
)*
};
}
abstract_float64xN_t!(
(TestableF64x1, float64x1_t, 1, vld1_f64, vst1_f64),
(TestableF64x2, float64x2_t, 2, vld1q_f64, vst1q_f64),
(TestableF64x1x2, float64x1x2_t, 2, vld1_f64_x2, vst1_f64_x2),
(TestableF64x1x3, float64x1x3_t, 3, vld1_f64_x3, vst1_f64_x3),
(TestableF64x1x4, float64x1x4_t, 4, vld1_f64_x4, vst1_f64_x4),
(
TestableF64x2x2,
float64x2x2_t,
4,
vld1q_f64_x2,
vst1q_f64_x2
),
(
TestableF64x2x3,
float64x2x3_t,
6,
vld1q_f64_x3,
vst1q_f64_x3
),
(
TestableF64x2x4,
float64x2x4_t,
8,
vld1q_f64_x4,
vst1q_f64_x4
)
);
}
}