Skip to main content

diskann_wide/arch/aarch64/
f32x2_.rs

1/*
2 * Copyright (c) Microsoft Corporation. All rights reserved.
3 * Licensed under the MIT license.
4 */
5
6use crate::{
7    Emulated,
8    constant::Const,
9    helpers,
10    traits::{SIMDMask, SIMDMulAdd, SIMDPartialEq, SIMDPartialOrd, SIMDSumTree, SIMDVector},
11};
12
13// AArch64 masks
14use super::{
15    Neon, internal,
16    macros::{self, AArchLoadStore, AArchSplat},
17    masks::mask32x2,
18};
19
20// AArch64 intrinsics
21use std::arch::aarch64::*;
22
23/////////////////////
24// 32-bit floating //
25/////////////////////
26
27macros::aarch64_define_register!(f32x2, float32x2_t, mask32x2, f32, 2, Neon);
28macros::aarch64_define_splat!(f32x2, vmov_n_f32);
29macros::aarch64_define_loadstore!(f32x2, vld1_f32, internal::load_first::f32x2, vst1_f32, 2);
30
31helpers::unsafe_map_binary_op!(f32x2, std::ops::Add, add, vadd_f32, "neon");
32helpers::unsafe_map_binary_op!(f32x2, std::ops::Sub, sub, vsub_f32, "neon");
33helpers::unsafe_map_binary_op!(f32x2, std::ops::Mul, mul, vmul_f32, "neon");
34macros::aarch64_define_fma!(f32x2, vfma_f32);
35
36macros::aarch64_define_cmp!(
37    f32x2,
38    vceq_f32,
39    (vmvn_u32),
40    vclt_f32,
41    vcle_f32,
42    vcgt_f32,
43    vcge_f32
44);
45
46impl SIMDSumTree for f32x2 {
47    #[inline(always)]
48    fn sum_tree(self) -> f32 {
49        if cfg!(miri) {
50            self.emulated().sum_tree()
51        } else {
52            // SAFETY: The presence of `Neon` enables the use of "neon" intrinsics.
53            unsafe { vaddv_f32(self.to_underlying()) }
54        }
55    }
56}
57
58///////////
59// Tests //
60///////////
61
62#[cfg(test)]
63mod tests {
64    use super::*;
65    use crate::{arch::aarch64::test_neon, reference::ReferenceScalarOps, test_utils};
66
67    #[test]
68    fn miri_test_load() {
69        if let Some(arch) = test_neon() {
70            test_utils::test_load_simd::<f32, 2, f32x2>(arch);
71        }
72    }
73
74    #[test]
75    fn miri_test_store() {
76        if let Some(arch) = test_neon() {
77            test_utils::test_store_simd::<f32, 2, f32x2>(arch);
78        }
79    }
80
81    // constructors
82    #[test]
83    fn test_constructors() {
84        if let Some(arch) = test_neon() {
85            test_utils::ops::test_splat::<f32, 2, f32x2>(arch);
86        }
87    }
88
89    // Ops
90    test_utils::ops::test_add!(f32x2, 0xcd7a8fea9a3fb727, test_neon());
91    test_utils::ops::test_sub!(f32x2, 0x3f6562c94c923238, test_neon());
92    test_utils::ops::test_mul!(f32x2, 0x07e48666c0fc564c, test_neon());
93    test_utils::ops::test_fma!(f32x2, 0xcfde9d031302cf2c, test_neon());
94
95    test_utils::ops::test_cmp!(f32x2, 0xc4f468b224622326, test_neon());
96
97    test_utils::ops::test_sumtree!(f32x2, 0x828bd890a470dc4d, test_neon());
98}