1use super::*;
2
3pick! {
4 if #[cfg(target_feature="sse2")] {
5 #[derive(Default, Clone, Copy, PartialEq, Eq)]
6 #[repr(C, align(16))]
7 pub struct u32x4 { pub(crate) sse: m128i }
8 } else if #[cfg(target_feature="simd128")] {
9 use core::arch::wasm32::*;
10
11 #[derive(Clone, Copy)]
12 #[repr(transparent)]
13 pub struct u32x4 { pub(crate) simd: v128 }
14
15 impl Default for u32x4 {
16 fn default() -> Self {
17 Self::splat(0)
18 }
19 }
20
21 impl PartialEq for u32x4 {
22 fn eq(&self, other: &Self) -> bool {
23 u32x4_all_true(u32x4_eq(self.simd, other.simd))
24 }
25 }
26
27 impl Eq for u32x4 { }
28 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29 use core::arch::aarch64::*;
30 #[repr(C)]
31 #[derive(Copy, Clone)]
32 pub struct u32x4 { pub(crate) neon : uint32x4_t }
33
34 impl Default for u32x4 {
35 #[inline]
36 fn default() -> Self {
37 Self::splat(0)
38 }
39 }
40
41 impl PartialEq for u32x4 {
42 #[inline]
43 fn eq(&self, other: &Self) -> bool {
44 unsafe { vminvq_u32(vceqq_u32(self.neon, other.neon))==u32::MAX }
45 }
46 }
47
48 impl Eq for u32x4 { }
49} else {
50 #[derive(Default, Clone, Copy, PartialEq, Eq)]
51 #[repr(C, align(16))]
52 pub struct u32x4 { arr: [u32;4] }
53 }
54}
55
56int_uint_consts!(u32, 4, u32x4, 128);
57
58unsafe impl Zeroable for u32x4 {}
59unsafe impl Pod for u32x4 {}
60
61impl AlignTo for u32x4 {
62 type Elem = u32;
63}
64
65impl Add for u32x4 {
66 type Output = Self;
67 #[inline]
68 fn add(self, rhs: Self) -> Self::Output {
69 pick! {
70 if #[cfg(target_feature="sse2")] {
71 Self { sse: add_i32_m128i(self.sse, rhs.sse) }
72 } else if #[cfg(target_feature="simd128")] {
73 Self { simd: u32x4_add(self.simd, rhs.simd) }
74 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
75 unsafe { Self { neon: vaddq_u32(self.neon, rhs.neon) } }
76 } else {
77 Self { arr: [
78 self.arr[0].wrapping_add(rhs.arr[0]),
79 self.arr[1].wrapping_add(rhs.arr[1]),
80 self.arr[2].wrapping_add(rhs.arr[2]),
81 self.arr[3].wrapping_add(rhs.arr[3]),
82 ]}
83 }
84 }
85 }
86}
87
88impl Sub for u32x4 {
89 type Output = Self;
90 #[inline]
91 fn sub(self, rhs: Self) -> Self::Output {
92 pick! {
93 if #[cfg(target_feature="sse2")] {
94 Self { sse: sub_i32_m128i(self.sse, rhs.sse) }
95 } else if #[cfg(target_feature="simd128")] {
96 Self { simd: u32x4_sub(self.simd, rhs.simd) }
97 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
98 unsafe {Self { neon: vsubq_u32(self.neon, rhs.neon) }}
99 } else {
100 Self { arr: [
101 self.arr[0].wrapping_sub(rhs.arr[0]),
102 self.arr[1].wrapping_sub(rhs.arr[1]),
103 self.arr[2].wrapping_sub(rhs.arr[2]),
104 self.arr[3].wrapping_sub(rhs.arr[3]),
105 ]}
106 }
107 }
108 }
109}
110
111impl Mul for u32x4 {
112 type Output = Self;
113 #[inline]
114 fn mul(self, rhs: Self) -> Self::Output {
115 pick! {
116 if #[cfg(target_feature="sse4.1")] {
117 Self { sse: mul_32_m128i(self.sse, rhs.sse) }
118 } else if #[cfg(target_feature="simd128")] {
119 Self { simd: u32x4_mul(self.simd, rhs.simd) }
120 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
121 unsafe {Self { neon: vmulq_u32(self.neon, rhs.neon) }}
122 } else {
123 let arr1: [u32; 4] = cast(self);
124 let arr2: [u32; 4] = cast(rhs);
125 cast([
126 arr1[0].wrapping_mul(arr2[0]),
127 arr1[1].wrapping_mul(arr2[1]),
128 arr1[2].wrapping_mul(arr2[2]),
129 arr1[3].wrapping_mul(arr2[3]),
130 ])
131 }
132 }
133 }
134}
135
136impl Add<u32> for u32x4 {
137 type Output = Self;
138 #[inline]
139 fn add(self, rhs: u32) -> Self::Output {
140 self.add(Self::splat(rhs))
141 }
142}
143
144impl Sub<u32> for u32x4 {
145 type Output = Self;
146 #[inline]
147 fn sub(self, rhs: u32) -> Self::Output {
148 self.sub(Self::splat(rhs))
149 }
150}
151
152impl Mul<u32> for u32x4 {
153 type Output = Self;
154 #[inline]
155 fn mul(self, rhs: u32) -> Self::Output {
156 self.mul(Self::splat(rhs))
157 }
158}
159
160impl Add<u32x4> for u32 {
161 type Output = u32x4;
162 #[inline]
163 fn add(self, rhs: u32x4) -> Self::Output {
164 u32x4::splat(self).add(rhs)
165 }
166}
167
168impl Sub<u32x4> for u32 {
169 type Output = u32x4;
170 #[inline]
171 fn sub(self, rhs: u32x4) -> Self::Output {
172 u32x4::splat(self).sub(rhs)
173 }
174}
175
176impl Mul<u32x4> for u32 {
177 type Output = u32x4;
178 #[inline]
179 fn mul(self, rhs: u32x4) -> Self::Output {
180 u32x4::splat(self).mul(rhs)
181 }
182}
183
184impl BitAnd for u32x4 {
185 type Output = Self;
186 #[inline]
187 fn bitand(self, rhs: Self) -> Self::Output {
188 pick! {
189 if #[cfg(target_feature="sse2")] {
190 Self { sse: bitand_m128i(self.sse, rhs.sse) }
191 } else if #[cfg(target_feature="simd128")] {
192 Self { simd: v128_and(self.simd, rhs.simd) }
193 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
194 unsafe {Self { neon: vandq_u32(self.neon, rhs.neon) }}
195 } else {
196 Self { arr: [
197 self.arr[0].bitand(rhs.arr[0]),
198 self.arr[1].bitand(rhs.arr[1]),
199 self.arr[2].bitand(rhs.arr[2]),
200 self.arr[3].bitand(rhs.arr[3]),
201 ]}
202 }
203 }
204 }
205}
206
207impl BitOr for u32x4 {
208 type Output = Self;
209 #[inline]
210 fn bitor(self, rhs: Self) -> Self::Output {
211 pick! {
212 if #[cfg(target_feature="sse2")] {
213 Self { sse: bitor_m128i(self.sse, rhs.sse) }
214 } else if #[cfg(target_feature="simd128")] {
215 Self { simd: v128_or(self.simd, rhs.simd) }
216 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
217 unsafe {Self { neon: vorrq_u32(self.neon, rhs.neon) }}
218 } else {
219 Self { arr: [
220 self.arr[0].bitor(rhs.arr[0]),
221 self.arr[1].bitor(rhs.arr[1]),
222 self.arr[2].bitor(rhs.arr[2]),
223 self.arr[3].bitor(rhs.arr[3]),
224 ]}
225 }
226 }
227 }
228}
229
230impl BitXor for u32x4 {
231 type Output = Self;
232 #[inline]
233 fn bitxor(self, rhs: Self) -> Self::Output {
234 pick! {
235 if #[cfg(target_feature="sse2")] {
236 Self { sse: bitxor_m128i(self.sse, rhs.sse) }
237 } else if #[cfg(target_feature="simd128")] {
238 Self { simd: v128_xor(self.simd, rhs.simd) }
239 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
240 unsafe {Self { neon: veorq_u32(self.neon, rhs.neon) }}
241 } else {
242 Self { arr: [
243 self.arr[0].bitxor(rhs.arr[0]),
244 self.arr[1].bitxor(rhs.arr[1]),
245 self.arr[2].bitxor(rhs.arr[2]),
246 self.arr[3].bitxor(rhs.arr[3]),
247 ]}
248 }
249 }
250 }
251}
252
253macro_rules! impl_shl_t_for_u32x4 {
254 ($($shift_type:ty),+ $(,)?) => {
255 $(impl Shl<$shift_type> for u32x4 {
256 type Output = Self;
257 #[inline]
259 fn shl(self, rhs: $shift_type) -> Self::Output {
260 pick! {
261 if #[cfg(target_feature="sse2")] {
262 let shift = cast([rhs as u64, 0]);
263 Self { sse: shl_all_u32_m128i(self.sse, shift) }
264 } else if #[cfg(target_feature="simd128")] {
265 Self { simd: u32x4_shl(self.simd, rhs as u32) }
266 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
267 unsafe {Self { neon: vshlq_u32(self.neon, vmovq_n_s32(rhs as i32)) }}
268 } else {
269 let u = rhs as u32;
270 Self { arr: [
271 self.arr[0].wrapping_shl(u),
272 self.arr[1].wrapping_shl(u),
273 self.arr[2].wrapping_shl(u),
274 self.arr[3].wrapping_shl(u),
275 ]}
276 }
277 }
278 }
279 })+
280 };
281}
282impl_shl_t_for_u32x4!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
283
284macro_rules! impl_shr_t_for_u32x4 {
285 ($($shift_type:ty),+ $(,)?) => {
286 $(impl Shr<$shift_type> for u32x4 {
287 type Output = Self;
288 #[inline]
290 fn shr(self, rhs: $shift_type) -> Self::Output {
291 pick! {
292 if #[cfg(target_feature="sse2")] {
293 let shift = cast([rhs as u64, 0]);
294 Self { sse: shr_all_u32_m128i(self.sse, shift) }
295 } else if #[cfg(target_feature="simd128")] {
296 Self { simd: u32x4_shr(self.simd, rhs as u32) }
297 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
298 unsafe {Self { neon: vshlq_u32(self.neon, vmovq_n_s32( -(rhs as i32))) }}
299 } else {
300 let u = rhs as u32;
301 Self { arr: [
302 self.arr[0].wrapping_shr(u),
303 self.arr[1].wrapping_shr(u),
304 self.arr[2].wrapping_shr(u),
305 self.arr[3].wrapping_shr(u),
306 ]}
307 }
308 }
309 }
310 })+
311 };
312}
313impl_shr_t_for_u32x4!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
314
315impl Shr<u32x4> for u32x4 {
321 type Output = Self;
322 #[inline]
323 fn shr(self, rhs: u32x4) -> Self::Output {
324 pick! {
325 if #[cfg(target_feature="avx2")] {
326 let shift_by = bitand_m128i(rhs.sse, set_splat_i32_m128i(31));
328 Self { sse: shr_each_u32_m128i(self.sse, shift_by) }
329 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
330 unsafe {
331 let shift_by = vnegq_s32(vreinterpretq_s32_u32(vandq_u32(rhs.neon, vmovq_n_u32(31))));
334 Self { neon: vshlq_u32(self.neon, shift_by) }
335 }
336 } else {
337 let arr: [u32; 4] = cast(self);
338 let rhs: [u32; 4] = cast(rhs);
339 cast([
340 arr[0].wrapping_shr(rhs[0]),
341 arr[1].wrapping_shr(rhs[1]),
342 arr[2].wrapping_shr(rhs[2]),
343 arr[3].wrapping_shr(rhs[3]),
344 ])
345 }
346 }
347 }
348}
349
350impl Shl<u32x4> for u32x4 {
356 type Output = Self;
357 #[inline]
358 fn shl(self, rhs: u32x4) -> Self::Output {
359 pick! {
360 if #[cfg(target_feature="avx2")] {
361 let shift_by = bitand_m128i(rhs.sse, set_splat_i32_m128i(31));
363 Self { sse: shl_each_u32_m128i(self.sse, shift_by) }
364 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
365 unsafe {
366 let shift_by = vreinterpretq_s32_u32(vandq_u32(rhs.neon, vmovq_n_u32(31)));
368 Self { neon: vshlq_u32(self.neon, shift_by) }
369 }
370 } else {
371 let arr: [u32; 4] = cast(self);
372 let rhs: [u32; 4] = cast(rhs);
373 cast([
374 arr[0].wrapping_shl(rhs[0]),
375 arr[1].wrapping_shl(rhs[1]),
376 arr[2].wrapping_shl(rhs[2]),
377 arr[3].wrapping_shl(rhs[3]),
378 ])
379 }
380 }
381 }
382}
383
384impl CmpEq for u32x4 {
385 type Output = Self;
386 #[inline]
387 fn simd_eq(self, rhs: Self) -> Self::Output {
388 Self::simd_eq(self, rhs)
389 }
390}
391
392impl CmpGt for u32x4 {
393 type Output = Self;
394 #[inline]
395 fn simd_gt(self, rhs: Self) -> Self::Output {
396 Self::simd_gt(self, rhs)
397 }
398}
399
400impl CmpLt for u32x4 {
401 type Output = Self;
402 #[inline]
403 fn simd_lt(self, rhs: Self) -> Self::Output {
404 Self::simd_gt(rhs, self)
406 }
407}
408
409impl u32x4 {
410 #[inline]
411 #[must_use]
412 pub const fn new(array: [u32; 4]) -> Self {
413 unsafe { core::mem::transmute(array) }
414 }
415 #[inline]
416 #[must_use]
417 pub fn simd_eq(self, rhs: Self) -> Self {
418 pick! {
419 if #[cfg(target_feature="sse2")] {
420 Self { sse: cmp_eq_mask_i32_m128i(self.sse, rhs.sse) }
421 } else if #[cfg(target_feature="simd128")] {
422 Self { simd: u32x4_eq(self.simd, rhs.simd) }
423 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
424 unsafe {Self { neon: vceqq_u32(self.neon, rhs.neon) }}
425 } else {
426 Self { arr: [
427 if self.arr[0] == rhs.arr[0] { u32::MAX } else { 0 },
428 if self.arr[1] == rhs.arr[1] { u32::MAX } else { 0 },
429 if self.arr[2] == rhs.arr[2] { u32::MAX } else { 0 },
430 if self.arr[3] == rhs.arr[3] { u32::MAX } else { 0 },
431 ]}
432 }
433 }
434 }
435 #[inline]
436 #[must_use]
437 pub fn simd_gt(self, rhs: Self) -> Self {
438 pick! {
439 if #[cfg(target_feature="sse2")] {
440 let h = u32x4::splat(1 << 31);
442 Self { sse: cmp_gt_mask_i32_m128i((self ^ h).sse, (rhs ^ h).sse) }
443 } else if #[cfg(target_feature="simd128")] {
444 Self { simd: u32x4_gt(self.simd, rhs.simd) }
445 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
446 unsafe {Self { neon: vcgtq_u32(self.neon, rhs.neon) }}
447 } else {
448 Self { arr: [
449 if self.arr[0] > rhs.arr[0] { u32::MAX } else { 0 },
450 if self.arr[1] > rhs.arr[1] { u32::MAX } else { 0 },
451 if self.arr[2] > rhs.arr[2] { u32::MAX } else { 0 },
452 if self.arr[3] > rhs.arr[3] { u32::MAX } else { 0 },
453 ]}
454 }
455 }
456 }
457 #[inline]
458 #[must_use]
459 pub fn simd_lt(self, rhs: Self) -> Self {
460 rhs.simd_gt(self)
462 }
463
464 #[inline]
468 #[must_use]
469 pub fn mul_keep_high(self, rhs: Self) -> Self {
470 pick! {
471 if #[cfg(target_feature="avx2")] {
472 let a = convert_to_i64_m256i_from_u32_m128i(self.sse);
473 let b = convert_to_i64_m256i_from_u32_m128i(rhs.sse);
474 let r = mul_u64_low_bits_m256i(a, b);
475
476 let b : [u32;8] = cast(r);
478 cast([b[1],b[3],b[5],b[7]])
479 } else if #[cfg(target_feature="sse2")] {
480 let evenp = mul_widen_u32_odd_m128i(self.sse, rhs.sse);
481
482 let oddp = mul_widen_u32_odd_m128i(
483 shr_imm_u64_m128i::<32>(self.sse),
484 shr_imm_u64_m128i::<32>(rhs.sse));
485
486 let a : [u32;4]= cast(evenp);
488 let b : [u32;4]= cast(oddp);
489 cast([a[1],b[1],a[3],b[3]])
490
491 } else if #[cfg(target_feature="simd128")] {
492 let low = u64x2_extmul_low_u32x4(self.simd, rhs.simd);
493 let high = u64x2_extmul_high_u32x4(self.simd, rhs.simd);
494
495 Self { simd: u32x4_shuffle::<1, 3, 5, 7>(low, high) }
496 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
497 unsafe {
498 let l = vmull_u32(vget_low_u32(self.neon), vget_low_u32(rhs.neon));
499 let h = vmull_u32(vget_high_u32(self.neon), vget_high_u32(rhs.neon));
500 u32x4 { neon: vcombine_u32(vshrn_n_u64(l,32), vshrn_n_u64(h,32)) }
501 }
502 } else {
503 let a: [u32; 4] = cast(self);
504 let b: [u32; 4] = cast(rhs);
505 cast([
506 ((u64::from(a[0]) * u64::from(b[0])) >> 32) as u32,
507 ((u64::from(a[1]) * u64::from(b[1])) >> 32) as u32,
508 ((u64::from(a[2]) * u64::from(b[2])) >> 32) as u32,
509 ((u64::from(a[3]) * u64::from(b[3])) >> 32) as u32,
510 ])
511 }
512 }
513 }
514
515 #[inline]
521 #[must_use]
522 pub fn mul_widen(self, rhs: Self) -> u64x4 {
523 pick! {
524 if #[cfg(target_feature="avx2")] {
525 let a = convert_to_i64_m256i_from_i32_m128i(self.sse);
527 let b = convert_to_i64_m256i_from_i32_m128i(rhs.sse);
528 cast(mul_u64_low_bits_m256i(a, b))
529 } else if #[cfg(target_feature="sse2")] {
530 let evenp = mul_widen_u32_odd_m128i(self.sse, rhs.sse);
531
532 let oddp = mul_widen_u32_odd_m128i(
533 shr_imm_u64_m128i::<32>(self.sse),
534 shr_imm_u64_m128i::<32>(rhs.sse));
535
536 u64x4 {
537 a: u64x2 { sse: unpack_low_i64_m128i(evenp, oddp)},
538 b: u64x2 { sse: unpack_high_i64_m128i(evenp, oddp)}
539 }
540 } else if #[cfg(target_feature="simd128")] {
541 u64x4 {
542 a: u64x2 { simd: u64x2_extmul_low_u32x4(self.simd, rhs.simd) },
543 b: u64x2 { simd: u64x2_extmul_high_u32x4(self.simd, rhs.simd) },
544 }
545 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
546 unsafe {
547 u64x4 { a: u64x2 { neon: vmull_u32(vget_low_u32(self.neon), vget_low_u32(rhs.neon)) },
548 b: u64x2 { neon: vmull_u32(vget_high_u32(self.neon), vget_high_u32(rhs.neon)) } }
549 }
550 } else {
551 let a: [u32; 4] = cast(self);
552 let b: [u32; 4] = cast(rhs);
553 cast([
554 u64::from(a[0]) * u64::from(b[0]),
555 u64::from(a[1]) * u64::from(b[1]),
556 u64::from(a[2]) * u64::from(b[2]),
557 u64::from(a[3]) * u64::from(b[3]),
558 ])
559 }
560 }
561 }
562
563 #[inline]
564 #[must_use]
565 pub fn blend(self, t: Self, f: Self) -> Self {
566 pick! {
567 if #[cfg(target_feature="sse4.1")] {
568 Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
569 } else if #[cfg(target_feature="simd128")] {
570 Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
571 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
572 unsafe {Self { neon: vbslq_u32(self.neon, t.neon, f.neon) }}
573 } else {
574 generic_bit_blend(self, t, f)
575 }
576 }
577 }
578 #[inline]
579 #[must_use]
580 pub fn max(self, rhs: Self) -> Self {
581 pick! {
582 if #[cfg(target_feature="sse4.1")] {
583 Self { sse: max_u32_m128i(self.sse, rhs.sse) }
584 } else if #[cfg(target_feature="simd128")] {
585 Self { simd: u32x4_max(self.simd, rhs.simd) }
586 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
587 unsafe {Self { neon: vmaxq_u32(self.neon, rhs.neon) }}
588 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
589 unsafe {Self { neon: vmaxq_u16(self.neon, rhs.neon) }}
590 } else {
591 let arr: [u32; 4] = cast(self);
592 let rhs: [u32; 4] = cast(rhs);
593 cast([
594 arr[0].max(rhs[0]),
595 arr[1].max(rhs[1]),
596 arr[2].max(rhs[2]),
597 arr[3].max(rhs[3]),
598 ])
599 }
600 }
601 }
602 #[inline]
603 #[must_use]
604 pub fn min(self, rhs: Self) -> Self {
605 pick! {
606 if #[cfg(target_feature="sse4.1")] {
607 Self { sse: min_u32_m128i(self.sse, rhs.sse) }
608 } else if #[cfg(target_feature="simd128")] {
609 Self { simd: u32x4_min(self.simd, rhs.simd) }
610 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
611 unsafe {Self { neon: vminq_u32(self.neon, rhs.neon) }}
612 } else {
613 let arr: [u32; 4] = cast(self);
614 let rhs: [u32; 4] = cast(rhs);
615 cast([
616 arr[0].min(rhs[0]),
617 arr[1].min(rhs[1]),
618 arr[2].min(rhs[2]),
619 arr[3].min(rhs[3]),
620 ])
621 }
622 }
623 }
624
625 #[inline]
626 #[must_use]
627 pub fn any(self) -> bool {
628 pick! {
629 if #[cfg(target_feature="sse2")] {
630 (move_mask_i8_m128i(self.sse) & 0b1000100010001000) != 0
631 } else if #[cfg(target_feature="simd128")] {
632 u32x4_bitmask(self.simd) != 0
633 } else {
634 let v : [u64;2] = cast(self);
635 ((v[0] | v[1]) & 0x8000000080000000) != 0
636 }
637 }
638 }
639
640 #[inline]
641 #[must_use]
642 pub fn all(self) -> bool {
643 pick! {
644 if #[cfg(target_feature="sse2")] {
645 (move_mask_i8_m128i(self.sse) & 0b1000100010001000) == 0b1000100010001000
646 } else if #[cfg(target_feature="simd128")] {
647 u32x4_bitmask(self.simd) == 0b1111
648 } else {
649 let v : [u64;2] = cast(self);
650 (v[0] & v[1] & 0x8000000080000000) == 0x8000000080000000
651 }
652 }
653 }
654
655 #[inline]
656 #[must_use]
657 pub fn none(self) -> bool {
658 !self.any()
659 }
660
661 #[must_use]
663 #[inline]
664 pub fn transpose(data: [u32x4; 4]) -> [u32x4; 4] {
665 pick! {
666 if #[cfg(target_feature="sse")] {
667 let mut e0 = data[0];
668 let mut e1 = data[1];
669 let mut e2 = data[2];
670 let mut e3 = data[3];
671
672 transpose_four_m128(
673 cast_mut(&mut e0.sse),
674 cast_mut(&mut e1.sse),
675 cast_mut(&mut e2.sse),
676 cast_mut(&mut e3.sse),
677 );
678
679 [e0, e1, e2, e3]
680 } else {
681 #[inline(always)]
682 fn transpose_column(data: &[u32x4; 4], index: usize) -> u32x4 {
683 u32x4::new([
684 data[0].as_array()[index],
685 data[1].as_array()[index],
686 data[2].as_array()[index],
687 data[3].as_array()[index],
688 ])
689 }
690
691 [
692 transpose_column(&data, 0),
693 transpose_column(&data, 1),
694 transpose_column(&data, 2),
695 transpose_column(&data, 3),
696 ]
697 }
698 }
699 }
700
701 #[inline]
702 #[must_use]
703 pub fn to_bitmask(self) -> u32 {
704 i32x4::to_bitmask(cast(self))
705 }
706
707 #[inline]
708 pub fn to_array(self) -> [u32; 4] {
709 cast(self)
710 }
711
712 #[inline]
713 pub fn as_array(&self) -> &[u32; 4] {
714 cast_ref(self)
715 }
716
717 #[inline]
718 pub fn as_mut_array(&mut self) -> &mut [u32; 4] {
719 cast_mut(self)
720 }
721}