ggmath/vector/primitive_impls/u32/
b128_sse2.rs1use_core_arch_x86! {
2 __m128i,
3 _mm_add_epi32,
4 _mm_and_si128,
5 _mm_or_si128,
6 _mm_set1_epi32,
7 _mm_set_epi32,
8 _mm_sll_epi32,
9 _mm_sub_epi32,
10 _mm_sra_epi32,
11 _mm_xor_si128,
12}
13
14use core::arch::asm;
15
16use crate::{
17 SimdBehaviour, Vec2, Vec3, Vec4, Vector,
18 vector::{SoundVectorRepr, primitive_impls::use_core_arch_x86, vec2, vec3, vec4},
19};
20
21unsafe impl SoundVectorRepr<4, u32> for __m128i {}
23
24unsafe impl SoundVectorRepr<3, u32> for __m128i {}
26
27impl SimdBehaviour<4> for u32 {
32 type VectorRepr = __m128i;
33
34 #[inline(always)]
35 fn vec_from_array(array: [Self; 4]) -> Vec4<Self> {
36 Vector::from_repr(unsafe {
37 _mm_set_epi32(
38 array[3].cast_signed(),
39 array[2].cast_signed(),
40 array[1].cast_signed(),
41 array[0].cast_signed(),
42 )
43 })
44 }
45
46 #[inline(always)]
47 fn vec_splat(value: Self) -> Vec4<Self> {
48 Vector::from_repr(unsafe { _mm_set1_epi32(value.cast_signed()) })
49 }
50
51 #[inline(always)]
52 unsafe fn vec_swizzle2<const X_SRC: usize, const Y_SRC: usize>(vec: Vec4<Self>) -> Vec2<Self> {
53 vec2!(vec[X_SRC], vec[Y_SRC])
54 }
55
56 #[inline(always)]
57 unsafe fn vec_swizzle3<const X_SRC: usize, const Y_SRC: usize, const Z_SRC: usize>(
58 vec: Vec4<Self>,
59 ) -> Vec3<Self> {
60 let result_as_vec4 = vec.swizzle4::<X_SRC, Y_SRC, Z_SRC, Z_SRC>();
61
62 Vector::from_repr(result_as_vec4.repr())
63 }
64
65 #[inline(always)]
66 unsafe fn vec_swizzle4<
67 const X_SRC: usize,
68 const Y_SRC: usize,
69 const Z_SRC: usize,
70 const W_SRC: usize,
71 >(
72 vec: Vec4<Self>,
73 ) -> Vec4<Self> {
74 let result: __m128i;
75 unsafe {
77 asm!("pshufd {0}, {0}, {1}", inout(xmm_reg) vec.repr() => result, const {
78 let x_src_bits = (X_SRC as u32) << 0;
79 let y_src_bits = (Y_SRC as u32) << 2;
80 let z_src_bits = (Z_SRC as u32) << 4;
81 let w_src_bits = (W_SRC as u32) << 6;
82
83 (x_src_bits | y_src_bits | z_src_bits | w_src_bits).cast_signed()
84 });
85 }
86
87 Vector::from_repr(result)
88 }
89
90 #[inline(always)]
93 fn vec_not(vec: Vec4<Self>) -> Vec4<Self> {
94 Vector::from_repr(unsafe { _mm_xor_si128(vec.repr(), vec4!(-1).repr()) })
95 }
96
97 #[inline(always)]
98 fn vec_add(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
99 if cfg!(debug_assertions) {
100 vec4!(vec.x + rhs.x, vec.y + rhs.y, vec.z + rhs.z, vec.w + rhs.w)
101 } else {
102 Vector::from_repr(unsafe { _mm_add_epi32(vec.repr(), rhs.repr()) })
103 }
104 }
105
106 #[inline(always)]
107 fn vec_sub(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
108 if cfg!(debug_assertions) {
109 vec4!(vec.x - rhs.x, vec.y - rhs.y, vec.z - rhs.z, vec.w - rhs.w)
110 } else {
111 Vector::from_repr(unsafe { _mm_sub_epi32(vec.repr(), rhs.repr()) })
112 }
113 }
114
115 #[inline(always)]
116 fn vec_mul(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
117 vec4!(vec.x * rhs.x, vec.y * rhs.y, vec.z * rhs.z, vec.w * rhs.w)
120 }
121
122 #[inline(always)]
123 fn vec_div(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
124 vec4!(vec.x / rhs.x, vec.y / rhs.y, vec.z / rhs.z, vec.w / rhs.w)
125 }
126
127 #[inline(always)]
128 fn vec_rem(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
129 vec4!(vec.x % rhs.x, vec.y % rhs.y, vec.z % rhs.z, vec.w % rhs.w)
130 }
131
132 #[inline(always)]
133 fn vec_shl(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
134 Vector::from_repr(unsafe { _mm_sll_epi32(vec.repr(), rhs.repr()) })
135 }
136
137 #[inline(always)]
138 fn vec_shr(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
139 Vector::from_repr(unsafe { _mm_sra_epi32(vec.repr(), rhs.repr()) })
140 }
141
142 #[inline(always)]
143 fn vec_bitand(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
144 Vector::from_repr(unsafe { _mm_and_si128(vec.repr(), rhs.repr()) })
145 }
146
147 #[inline(always)]
148 fn vec_bitor(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
149 Vector::from_repr(unsafe { _mm_or_si128(vec.repr(), rhs.repr()) })
150 }
151
152 #[inline(always)]
153 fn vec_bitxor(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
154 Vector::from_repr(unsafe { _mm_xor_si128(vec.repr(), rhs.repr()) })
155 }
156}
157
158impl SimdBehaviour<3> for u32 {
163 type VectorRepr = __m128i;
164
165 #[inline(always)]
166 fn vec_from_array(array: [Self; 3]) -> Vec3<Self> {
167 Vector::from_repr(unsafe {
168 _mm_set_epi32(
169 array[2].cast_signed(),
170 array[2].cast_signed(),
171 array[1].cast_signed(),
172 array[0].cast_signed(),
173 )
174 })
175 }
176
177 #[inline(always)]
178 fn vec_splat(value: Self) -> Vec3<Self> {
179 Vector::from_repr(unsafe { _mm_set1_epi32(value.cast_signed()) })
180 }
181
182 #[inline(always)]
183 unsafe fn vec_swizzle2<const X_SRC: usize, const Y_SRC: usize>(vec: Vec3<Self>) -> Vec2<Self> {
184 vec2!(vec[X_SRC], vec[Y_SRC])
185 }
186
187 #[inline(always)]
188 unsafe fn vec_swizzle3<const X_SRC: usize, const Y_SRC: usize, const Z_SRC: usize>(
189 vec: Vec3<Self>,
190 ) -> Vec3<Self> {
191 let vec_as_vec4 = Vec4::<Self>::from_repr(vec.repr());
192 let result_as_vec4 = vec_as_vec4.swizzle4::<X_SRC, Y_SRC, Z_SRC, Z_SRC>();
193
194 Vector::from_repr(result_as_vec4.repr())
195 }
196
197 #[inline(always)]
198 unsafe fn vec_swizzle4<
199 const X_SRC: usize,
200 const Y_SRC: usize,
201 const Z_SRC: usize,
202 const W_SRC: usize,
203 >(
204 vec: Vec3<Self>,
205 ) -> Vec4<Self> {
206 let vec_as_vec4 = Vec4::<Self>::from_repr(vec.repr());
207
208 vec_as_vec4.swizzle4::<X_SRC, Y_SRC, Z_SRC, W_SRC>()
209 }
210
211 #[inline(always)]
214 fn vec_not(vec: Vec3<Self>) -> Vec3<Self> {
215 Vector::from_repr(unsafe { _mm_xor_si128(vec.repr(), vec3!(-1).repr()) })
216 }
217
218 #[inline(always)]
219 fn vec_add(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
220 if cfg!(debug_assertions) {
221 vec3!(vec.x + rhs.x, vec.y + rhs.y, vec.z + rhs.z)
222 } else {
223 Vector::from_repr(unsafe { _mm_add_epi32(vec.repr(), rhs.repr()) })
224 }
225 }
226
227 #[inline(always)]
228 fn vec_sub(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
229 if cfg!(debug_assertions) {
230 vec3!(vec.x - rhs.x, vec.y - rhs.y, vec.z - rhs.z)
231 } else {
232 Vector::from_repr(unsafe { _mm_sub_epi32(vec.repr(), rhs.repr()) })
233 }
234 }
235
236 #[inline(always)]
237 fn vec_mul(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
238 vec3!(vec.x * rhs.x, vec.y * rhs.y, vec.z * rhs.z)
241 }
242
243 #[inline(always)]
244 fn vec_div(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
245 vec3!(vec.x / rhs.x, vec.y / rhs.y, vec.z / rhs.z)
246 }
247
248 #[inline(always)]
249 fn vec_rem(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
250 vec3!(vec.x % rhs.x, vec.y % rhs.y, vec.z % rhs.z)
251 }
252
253 #[inline(always)]
254 fn vec_shl(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
255 Vector::from_repr(unsafe { _mm_sll_epi32(vec.repr(), rhs.repr()) })
256 }
257
258 #[inline(always)]
259 fn vec_shr(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
260 Vector::from_repr(unsafe { _mm_sra_epi32(vec.repr(), rhs.repr()) })
261 }
262
263 #[inline(always)]
264 fn vec_bitand(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
265 Vector::from_repr(unsafe { _mm_and_si128(vec.repr(), rhs.repr()) })
266 }
267
268 #[inline(always)]
269 fn vec_bitor(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
270 Vector::from_repr(unsafe { _mm_or_si128(vec.repr(), rhs.repr()) })
271 }
272
273 #[inline(always)]
274 fn vec_bitxor(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
275 Vector::from_repr(unsafe { _mm_xor_si128(vec.repr(), rhs.repr()) })
276 }
277}