diskann_wide/arch/aarch64/
u32x4_.rs1use crate::{
7 Emulated, SIMDDotProduct, SIMDMask, SIMDMulAdd, SIMDPartialEq, SIMDPartialOrd, SIMDSelect,
8 SIMDSumTree, SIMDVector, constant::Const, helpers,
9};
10
11use super::{
13 Neon, internal,
14 macros::{self, AArchLoadStore, AArchSplat},
15 masks::mask32x4,
16 u8x16,
17};
18
19use std::arch::{aarch64::*, asm};
21
22macros::aarch64_define_register!(u32x4, uint32x4_t, mask32x4, u32, 4, Neon);
27macros::aarch64_define_splat!(u32x4, vmovq_n_u32);
28macros::aarch64_define_loadstore!(u32x4, vld1q_u32, internal::load_first::u32x4, vst1q_u32, 4);
29
30helpers::unsafe_map_binary_op!(u32x4, std::ops::Add, add, vaddq_u32, "neon");
31helpers::unsafe_map_binary_op!(u32x4, std::ops::Sub, sub, vsubq_u32, "neon");
32helpers::unsafe_map_binary_op!(u32x4, std::ops::Mul, mul, vmulq_u32, "neon");
33macros::aarch64_define_fma!(u32x4, vmlaq_u32);
34
35macros::aarch64_define_cmp!(
36 u32x4,
37 vceqq_u32,
38 (vmvnq_u32),
39 vcltq_u32,
40 vcleq_u32,
41 vcgtq_u32,
42 vcgeq_u32
43);
44macros::aarch64_define_bitops!(
45 u32x4,
46 vmvnq_u32,
47 vandq_u32,
48 vorrq_u32,
49 veorq_u32,
50 (
51 vshlq_u32,
52 32,
53 vnegq_s32,
54 vminq_u32,
55 vreinterpretq_s32_u32,
56 std::convert::identity
57 ),
58 (u32, i32, vmovq_n_s32),
59);
60
61impl SIMDSumTree for u32x4 {
62 #[inline(always)]
63 fn sum_tree(self) -> u32 {
64 if cfg!(miri) {
65 self.emulated().sum_tree()
66 } else {
67 unsafe { vaddvq_u32(self.0) }
69 }
70 }
71}
72
73impl SIMDSelect<u32x4> for mask32x4 {
74 #[inline(always)]
75 fn select(self, x: u32x4, y: u32x4) -> u32x4 {
76 u32x4(unsafe { vbslq_u32(self.0, x.0, y.0) })
78 }
79}
80
81impl SIMDDotProduct<u8x16, u8x16> for u32x4 {
82 #[inline(always)]
83 fn dot_simd(self, left: u8x16, right: u8x16) -> Self {
84 if cfg!(miri) {
85 use crate::AsSIMD;
86 self.emulated()
87 .dot_simd(left.emulated(), right.emulated())
88 .as_simd(self.arch())
89 } else {
90 #[target_feature(enable = "dotprod")]
95 unsafe fn udot(mut s: uint32x4_t, x: uint8x16_t, y: uint8x16_t) -> uint32x4_t {
96 unsafe {
99 asm!(
100 "udot {0:v}.4s, {1:v}.16b, {2:v}.16b",
101 inout(vreg) s,
102 in(vreg) x,
103 in(vreg) y,
104 options(pure, nomem, nostack)
105 );
106 }
107
108 s
109 }
110
111 Self::from_underlying(self.arch(), unsafe { udot(self.0, left.0, right.0) })
113 }
114 }
115}
116
117#[cfg(test)]
122mod tests {
123 use super::*;
124 use crate::{arch::aarch64::test_neon, reference::ReferenceScalarOps, test_utils};
125
126 #[test]
127 fn miri_test_load() {
128 if let Some(arch) = test_neon() {
129 test_utils::test_load_simd::<u32, 4, u32x4>(arch);
130 }
131 }
132
133 #[test]
134 fn miri_test_store() {
135 if let Some(arch) = test_neon() {
136 test_utils::test_store_simd::<u32, 4, u32x4>(arch);
137 }
138 }
139
140 #[test]
142 fn test_constructors() {
143 if let Some(arch) = test_neon() {
144 test_utils::ops::test_splat::<u32, 4, u32x4>(arch);
145 }
146 }
147
148 test_utils::ops::test_add!(u32x4, 0x3017fd73c99cc633, test_neon());
150 test_utils::ops::test_sub!(u32x4, 0xfc627f10b5f8db8a, test_neon());
151 test_utils::ops::test_mul!(u32x4, 0x0f4caa80eceaa523, test_neon());
152 test_utils::ops::test_fma!(u32x4, 0xb8f702ba85375041, test_neon());
153
154 test_utils::ops::test_cmp!(u32x4, 0x941757bd5cc641a1, test_neon());
155
156 test_utils::dot_product::test_dot_product!(
158 (u8x16, u8x16) => u32x4,
159 0x145f89b446c03ff1,
160 test_neon()
161 );
162
163 test_utils::ops::test_bitops!(u32x4, 0xd62d8de09f82ed4e, test_neon());
165 test_utils::ops::test_select!(u32x4, 0xd62d8de09f82ed4e, test_neon());
166
167 test_utils::ops::test_sumtree!(u32x4, 0xb9ac82ab23a855da, test_neon());
169}