1use super::*;
2
3pick! {
4 if #[cfg(target_feature="avx2")] {
5 #[derive(Default, Clone, Copy, PartialEq, Eq)]
6 #[repr(C, align(32))]
7 pub struct u32x8 { pub(crate) avx2: m256i }
8 } else {
9 #[derive(Default, Clone, Copy, PartialEq, Eq)]
10 #[repr(C, align(32))]
11 pub struct u32x8 { pub(crate) a : u32x4, pub(crate) b : u32x4 }
12 }
13}
14
15int_uint_consts!(u32, 8, u32x8, 256);
16
17unsafe impl Zeroable for u32x8 {}
18unsafe impl Pod for u32x8 {}
19
20impl AlignTo for u32x8 {
21 type Elem = u32;
22}
23
24impl Add for u32x8 {
25 type Output = Self;
26 #[inline]
27 fn add(self, rhs: Self) -> Self::Output {
28 pick! {
29 if #[cfg(target_feature="avx2")] {
30 Self { avx2: add_i32_m256i(self.avx2, rhs.avx2) }
31 } else {
32 Self {
33 a : self.a.add(rhs.a),
34 b : self.b.add(rhs.b),
35 }
36 }
37 }
38 }
39}
40
41impl Sub for u32x8 {
42 type Output = Self;
43 #[inline]
44 fn sub(self, rhs: Self) -> Self::Output {
45 pick! {
46 if #[cfg(target_feature="avx2")] {
47 Self { avx2: sub_i32_m256i(self.avx2, rhs.avx2) }
48 } else {
49 Self {
50 a : self.a.sub(rhs.a),
51 b : self.b.sub(rhs.b),
52 }
53 }
54 }
55 }
56}
57
58impl Add<u32> for u32x8 {
59 type Output = Self;
60 #[inline]
70 fn add(self, rhs: u32) -> Self::Output {
71 self + Self::splat(rhs)
72 }
73}
74
75impl Sub<u32> for u32x8 {
76 type Output = Self;
77 #[inline]
87 fn sub(self, rhs: u32) -> Self::Output {
88 self - Self::splat(rhs)
89 }
90}
91
92impl Mul<u32> for u32x8 {
93 type Output = Self;
94 #[inline]
104 fn mul(self, rhs: u32) -> Self::Output {
105 self * Self::splat(rhs)
106 }
107}
108
109impl Mul for u32x8 {
110 type Output = Self;
111 #[inline]
112 fn mul(self, rhs: Self) -> Self::Output {
113 pick! {
114 if #[cfg(target_feature="avx2")] {
115 Self { avx2: mul_i32_keep_low_m256i(self.avx2, rhs.avx2) }
116 } else {
117 Self {
118 a : self.a.mul(rhs.a),
119 b : self.b.mul(rhs.b),
120 }
121 }
122 }
123 }
124}
125
126integer_impl_div_rem!(u32, u32x8, [0, 1, 2, 3, 4, 5, 6, 7]);
127
128impl BitAnd for u32x8 {
129 type Output = Self;
130 #[inline]
131 fn bitand(self, rhs: Self) -> Self::Output {
132 pick! {
133 if #[cfg(target_feature="avx2")] {
134 Self { avx2: bitand_m256i(self.avx2, rhs.avx2) }
135 } else {
136 Self {
137 a : self.a.bitand(rhs.a),
138 b : self.b.bitand(rhs.b),
139 }
140 }
141 }
142 }
143}
144
145impl BitOr for u32x8 {
146 type Output = Self;
147 #[inline]
148 fn bitor(self, rhs: Self) -> Self::Output {
149 pick! {
150 if #[cfg(target_feature="avx2")] {
151 Self { avx2: bitor_m256i(self.avx2, rhs.avx2) }
152 } else {
153 Self {
154 a : self.a.bitor(rhs.a),
155 b : self.b.bitor(rhs.b),
156 }
157 }
158 }
159 }
160}
161
162impl BitXor for u32x8 {
163 type Output = Self;
164 #[inline]
165 fn bitxor(self, rhs: Self) -> Self::Output {
166 pick! {
167 if #[cfg(target_feature="avx2")] {
168 Self { avx2: bitxor_m256i(self.avx2, rhs.avx2) }
169 } else {
170 Self {
171 a : self.a.bitxor(rhs.a),
172 b : self.b.bitxor(rhs.b),
173 }
174 }
175 }
176 }
177}
178
179impl Add<u32x8> for u32 {
180 type Output = u32x8;
181
182 #[inline]
183 fn add(self, rhs: u32x8) -> Self::Output {
184 u32x8::splat(self) + rhs
185 }
186}
187
188impl Sub<u32x8> for u32 {
189 type Output = u32x8;
190
191 #[inline]
192 fn sub(self, rhs: u32x8) -> Self::Output {
193 u32x8::splat(self) - rhs
194 }
195}
196
197impl From<u16x8> for u32x8 {
198 #[inline]
200 fn from(v: u16x8) -> Self {
201 pick! {
202 if #[cfg(target_feature="avx2")] {
203 Self { avx2:convert_to_i32_m256i_from_u16_m128i(v.sse) }
204 } else if #[cfg(target_feature="sse2")] {
205 Self {
206 a: u32x4 { sse: shr_imm_u32_m128i::<16>( unpack_low_i16_m128i(v.sse, v.sse)) },
207 b: u32x4 { sse: shr_imm_u32_m128i::<16>( unpack_high_i16_m128i(v.sse, v.sse)) },
208 }
209 } else {
210 u32x8::new([
211 u32::from(v.as_array()[0]),
212 u32::from(v.as_array()[1]),
213 u32::from(v.as_array()[2]),
214 u32::from(v.as_array()[3]),
215 u32::from(v.as_array()[4]),
216 u32::from(v.as_array()[5]),
217 u32::from(v.as_array()[6]),
218 u32::from(v.as_array()[7]),
219 ])
220 }
221 }
222 }
223}
224
225macro_rules! impl_shl_t_for_u32x8 {
226 ($($shift_type:ty),+ $(,)?) => {
227 $(impl Shl<$shift_type> for u32x8 {
228 type Output = Self;
229 #[inline]
231 fn shl(self, rhs: $shift_type) -> Self::Output {
232 pick! {
233 if #[cfg(target_feature="avx2")] {
234 let shift = cast([rhs as u64, 0]);
235 Self { avx2: shl_all_u32_m256i(self.avx2, shift) }
236 } else {
237 Self {
238 a : self.a.shl(rhs),
239 b : self.b.shl(rhs),
240 }
241 }
242 }
243 }
244 })+
245 };
246}
247impl_shl_t_for_u32x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
248
249macro_rules! impl_shr_t_for_u32x8 {
250 ($($shift_type:ty),+ $(,)?) => {
251 $(impl Shr<$shift_type> for u32x8 {
252 type Output = Self;
253 #[inline]
255 fn shr(self, rhs: $shift_type) -> Self::Output {
256 pick! {
257 if #[cfg(target_feature="avx2")] {
258 let shift = cast([rhs as u64, 0]);
259 Self { avx2: shr_all_u32_m256i(self.avx2, shift) }
260 } else {
261 Self {
262 a : self.a.shr(rhs),
263 b : self.b.shr(rhs),
264 }
265 }
266 }
267 }
268 })+
269 };
270}
271
272impl_shr_t_for_u32x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
273
274impl Shr<u32x8> for u32x8 {
280 type Output = Self;
281
282 #[inline]
283 fn shr(self, rhs: u32x8) -> Self::Output {
284 pick! {
285 if #[cfg(target_feature="avx2")] {
286 let shift_by = bitand_m256i(rhs.avx2, set_splat_i32_m256i(31));
288 Self { avx2: shr_each_u32_m256i(self.avx2, shift_by ) }
289 } else {
290 Self {
291 a : self.a.shr(rhs.a),
292 b : self.b.shr(rhs.b),
293 }
294 }
295 }
296 }
297}
298
299impl Shl<u32x8> for u32x8 {
305 type Output = Self;
306
307 #[inline]
308 fn shl(self, rhs: u32x8) -> Self::Output {
309 pick! {
310 if #[cfg(target_feature="avx2")] {
311 let shift_by = bitand_m256i(rhs.avx2, set_splat_i32_m256i(31));
313 Self { avx2: shl_each_u32_m256i(self.avx2, shift_by) }
314 } else {
315 Self {
316 a : self.a.shl(rhs.a),
317 b : self.b.shl(rhs.b),
318 }
319 }
320 }
321 }
322}
323
324#[expect(deprecated)]
325impl CmpEq for u32x8 {
326 type Output = Self;
327 #[inline]
342 fn simd_eq(self, rhs: Self) -> Self::Output {
343 pick! {
344 if #[cfg(target_feature="avx2")] {
345 Self { avx2: cmp_eq_mask_i32_m256i(self.avx2, rhs.avx2 ) }
346 } else {
347 Self {
348 a : self.a.simd_eq(rhs.a),
349 b : self.b.simd_eq(rhs.b),
350 }
351 }
352 }
353 }
354}
355
356#[expect(deprecated)]
357impl CmpGt for u32x8 {
358 type Output = Self;
359 #[inline]
376 fn simd_gt(self, rhs: Self) -> Self::Output {
377 pick! {
378 if #[cfg(target_feature="avx2")] {
379 let highbit = u32x8::splat(1 << 31);
381 Self { avx2: cmp_gt_mask_i32_m256i((self ^ highbit).avx2, (rhs ^ highbit).avx2 ) }
382 } else {
383 Self {
384 a : self.a.simd_gt(rhs.a),
385 b : self.b.simd_gt(rhs.b),
386 }
387 }
388 }
389 }
390}
391
392#[expect(deprecated)]
393impl CmpLt for u32x8 {
394 type Output = Self;
395 #[inline]
412 fn simd_lt(self, rhs: Self) -> Self::Output {
413 rhs.simd_gt(self)
415 }
416}
417
418#[expect(deprecated)]
419impl CmpNe for u32x8 {
420 type Output = Self;
421 #[inline]
436 fn simd_ne(self, rhs: Self) -> Self::Output {
437 !self.simd_eq(rhs)
438 }
439}
440
441#[expect(deprecated)]
442impl CmpGe for u32x8 {
443 type Output = Self;
444 #[inline]
463 fn simd_ge(self, rhs: Self) -> Self::Output {
464 self.simd_eq(rhs) | self.simd_gt(rhs)
465 }
466}
467
468#[expect(deprecated)]
469impl CmpLe for u32x8 {
470 type Output = Self;
471 #[inline]
490 fn simd_le(self, rhs: Self) -> Self::Output {
491 self.simd_eq(rhs) | self.simd_lt(rhs)
492 }
493}
494
495impl u32x8 {
496 #[inline]
497 #[must_use]
498 pub const fn new(array: [u32; 8]) -> Self {
499 unsafe { core::mem::transmute(array) }
500 }
501
502 simd_comparison_fns!();
503
504 #[inline]
508 #[must_use]
509 pub fn mul_keep_high(self, rhs: u32x8) -> u32x8 {
510 pick! {
511 if #[cfg(target_feature="avx2")] {
512 let a : [u32;8]= cast(self);
513 let b : [u32;8]= cast(rhs);
514
515 let r1 : [u32;8] = cast(mul_u64_low_bits_m256i(cast([a[0], 0, a[1], 0, a[2], 0, a[3], 0]), cast([b[0], 0, b[1], 0, b[2], 0, b[3], 0])));
517 let r2 : [u32;8] = cast(mul_u64_low_bits_m256i(cast([a[4], 0, a[5], 0, a[6], 0, a[7], 0]), cast([b[4], 0, b[5], 0, b[6], 0, b[7], 0])));
518
519 cast([r1[1], r1[3], r1[5], r1[7], r2[1], r2[3], r2[5], r2[7]])
520 } else {
521 Self {
522 a : self.a.mul_keep_high(rhs.a),
523 b : self.b.mul_keep_high(rhs.b),
524 }
525 }
526 }
527 }
528
529 #[inline]
530 #[must_use]
531 pub fn blend(self, t: Self, f: Self) -> Self {
532 pick! {
533 if #[cfg(target_feature="avx2")] {
534 Self { avx2: blend_varying_i8_m256i(f.avx2, t.avx2, self.avx2) }
535 } else {
536 Self {
537 a : self.a.blend(t.a, f.a),
538 b : self.b.blend(t.b, f.b),
539 }
540 }
541 }
542 }
543
544 #[inline]
545 #[must_use]
546 pub fn reduce_add(self) -> u32 {
547 cast(i32x8::reduce_add(cast(self)))
548 }
549
550 #[inline]
551 #[must_use]
552 pub fn reduce_max(self) -> u32 {
553 let array: [u32x4; 2] = cast(self);
554 array[0].max(array[1]).reduce_max()
555 }
556
557 #[inline]
558 #[must_use]
559 pub fn reduce_min(self) -> u32 {
560 let array: [u32x4; 2] = cast(self);
561 array[0].min(array[1]).reduce_min()
562 }
563
564 #[inline]
565 #[must_use]
566 pub fn max(self, rhs: Self) -> Self {
567 pick! {
568 if #[cfg(target_feature="avx2")] {
569 Self { avx2: max_u32_m256i(self.avx2, rhs.avx2 ) }
570 } else {
571 Self {
572 a : self.a.max(rhs.a),
573 b : self.b.max(rhs.b),
574 }
575 }
576 }
577 }
578 #[inline]
579 #[must_use]
580 pub fn min(self, rhs: Self) -> Self {
581 pick! {
582 if #[cfg(target_feature="avx2")] {
583 Self { avx2: min_u32_m256i(self.avx2, rhs.avx2 ) }
584 } else {
585 Self {
586 a : self.a.min(rhs.a),
587 b : self.b.min(rhs.b),
588 }
589 }
590 }
591 }
592
593 integer_fn_clamp!();
594
595 #[inline]
596 #[must_use]
597 pub fn saturating_add(self, rhs: Self) -> Self {
598 pick! {
599 if #[cfg(target_feature="avx2")] {
600 let result = self + rhs;
601 result.simd_lt(self).blend(Self::MAX, result)
602 } else {
603 Self {
604 a: self.a.saturating_add(rhs.a),
605 b: self.b.saturating_add(rhs.b),
606 }
607 }
608 }
609 }
610
611 #[inline]
612 #[must_use]
613 pub fn saturating_sub(self, rhs: Self) -> Self {
614 pick! {
615 if #[cfg(target_feature="avx2")] {
616 let result = self - rhs;
617 result.simd_gt(self).blend(Self::MIN, result)
618 } else {
619 Self {
620 a: self.a.saturating_sub(rhs.a),
621 b: self.b.saturating_sub(rhs.b),
622 }
623 }
624 }
625 }
626
627 #[inline]
629 #[must_use]
630 pub fn saturating_mul(self, rhs: Self) -> Self {
631 pick! {
632 if #[cfg(target_feature="avx2")] {
633 let even_wide_mul = mul_u64_low_bits_m256i(self.avx2, rhs.avx2);
634 let odd_wide_mul = mul_u64_low_bits_m256i(
635 shuffle_ai_i32_half_m256i::<0b_00_11_00_01>(self.avx2),
636 shuffle_ai_i32_half_m256i::<0b_00_11_00_01>(rhs.avx2),
637 );
638
639 let ll_hh_1 = unpack_low_i32_m256i(even_wide_mul, odd_wide_mul);
640 let ll_hh_2 = unpack_high_i32_m256i(even_wide_mul, odd_wide_mul);
641 let low = Self { avx2: unpack_low_i64_m256i(ll_hh_1, ll_hh_2) };
642 let high = Self { avx2: unpack_high_i64_m256i(ll_hh_1, ll_hh_2) };
643
644 let no_overflow = high.simd_eq(Self::ZERO);
645 no_overflow.blend(low, Self::MAX)
646 } else {
647 let [self_a, self_b]: [u32x4; 2] = cast(self);
648 let [rhs_a, rhs_b]: [u32x4; 2] = cast(rhs);
649
650 cast([self_a.saturating_mul(rhs_a), self_b.saturating_mul(rhs_b)])
651 }
652 }
653 }
654
655 integer_fn_saturating_div!([0, 1, 2, 3, 4, 5, 6, 7]);
656
657 #[inline]
658 #[must_use]
659 #[doc(alias("movemask", "move_mask"))]
660 pub fn to_bitmask(self) -> u32 {
661 i32x8::to_bitmask(cast(self))
662 }
663
664 #[inline]
665 #[must_use]
666 pub fn any(self) -> bool {
667 pick! {
668 if #[cfg(target_feature="avx2")] {
669 ((move_mask_i8_m256i(self.avx2) as u32) & 0b10001000100010001000100010001000) != 0
670 } else {
671 (self.a | self.b).any()
672 }
673 }
674 }
675
676 #[inline]
677 #[must_use]
678 pub fn all(self) -> bool {
679 pick! {
680 if #[cfg(target_feature="avx2")] {
681 ((move_mask_i8_m256i(self.avx2) as u32) & 0b10001000100010001000100010001000) == 0b10001000100010001000100010001000
682 } else {
683 (self.a & self.b).all()
684 }
685 }
686 }
687
688 #[inline]
689 #[must_use]
690 pub fn none(self) -> bool {
691 !self.any()
692 }
693
694 #[must_use]
696 #[inline]
697 pub fn transpose(data: [u32x8; 8]) -> [u32x8; 8] {
698 cast(i32x8::transpose(cast(data)))
699 }
700
701 #[inline]
702 pub fn to_array(self) -> [u32; 8] {
703 cast(self)
704 }
705
706 #[inline]
707 pub fn as_array(&self) -> &[u32; 8] {
708 cast_ref(self)
709 }
710
711 #[inline]
712 pub fn as_mut_array(&mut self) -> &mut [u32; 8] {
713 cast_mut(self)
714 }
715}
716
717impl Not for u32x8 {
718 type Output = Self;
719 #[inline]
720 fn not(self) -> Self {
721 pick! {
722 if #[cfg(target_feature="avx2")] {
723 Self { avx2: self.avx2.not() }
724 } else {
725 Self {
726 a : self.a.not(),
727 b : self.b.not(),
728 }
729 }
730 }
731 }
732}