1use super::*;
2
3pick! {
4 if #[cfg(target_feature="sse2")] {
5 #[derive(Default, Clone, Copy, PartialEq, Eq)]
6 #[repr(C, align(16))]
7 pub struct i8x16 { pub(crate) sse: m128i }
8 } else if #[cfg(target_feature="simd128")] {
9 use core::arch::wasm32::*;
10
11 #[derive(Clone, Copy)]
12 #[repr(transparent)]
13 pub struct i8x16 { pub(crate) simd: v128 }
14
15 impl Default for i8x16 {
16 fn default() -> Self {
17 Self::splat(0)
18 }
19 }
20
21 impl PartialEq for i8x16 {
22 fn eq(&self, other: &Self) -> bool {
23 u8x16_all_true(i8x16_eq(self.simd, other.simd))
24 }
25 }
26
27 impl Eq for i8x16 { }
28 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29 use core::arch::aarch64::*;
30 #[repr(C)]
31 #[derive(Copy, Clone)]
32 pub struct i8x16 { pub(crate) neon : int8x16_t }
33
34 impl Default for i8x16 {
35 #[inline]
36 fn default() -> Self {
37 Self::splat(0)
38 }
39 }
40
41 impl PartialEq for i8x16 {
42 #[inline]
43 fn eq(&self, other: &Self) -> bool {
44 unsafe { vminvq_u8(vceqq_s8(self.neon, other.neon))==u8::MAX }
45 }
46 }
47
48 impl Eq for i8x16 { }
49 } else {
50 #[derive(Default, Clone, Copy, PartialEq, Eq)]
51 #[repr(C, align(16))]
52 pub struct i8x16 { arr: [i8;16] }
53 }
54}
55
56int_uint_consts!(i8, 16, i8x16, 128);
57
58unsafe impl Zeroable for i8x16 {}
59unsafe impl Pod for i8x16 {}
60
61impl AlignTo for i8x16 {
62 type Elem = i8;
63}
64
65impl Add for i8x16 {
66 type Output = Self;
67 #[inline]
68 fn add(self, rhs: Self) -> Self::Output {
69 pick! {
70 if #[cfg(target_feature="sse2")] {
71 Self { sse: add_i8_m128i(self.sse, rhs.sse) }
72 } else if #[cfg(target_feature="simd128")] {
73 Self { simd: i8x16_add(self.simd, rhs.simd) }
74 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
75 unsafe { Self { neon: vaddq_s8(self.neon, rhs.neon) } }
76 } else {
77 Self { arr: [
78 self.arr[0].wrapping_add(rhs.arr[0]),
79 self.arr[1].wrapping_add(rhs.arr[1]),
80 self.arr[2].wrapping_add(rhs.arr[2]),
81 self.arr[3].wrapping_add(rhs.arr[3]),
82 self.arr[4].wrapping_add(rhs.arr[4]),
83 self.arr[5].wrapping_add(rhs.arr[5]),
84 self.arr[6].wrapping_add(rhs.arr[6]),
85 self.arr[7].wrapping_add(rhs.arr[7]),
86 self.arr[8].wrapping_add(rhs.arr[8]),
87 self.arr[9].wrapping_add(rhs.arr[9]),
88 self.arr[10].wrapping_add(rhs.arr[10]),
89 self.arr[11].wrapping_add(rhs.arr[11]),
90 self.arr[12].wrapping_add(rhs.arr[12]),
91 self.arr[13].wrapping_add(rhs.arr[13]),
92 self.arr[14].wrapping_add(rhs.arr[14]),
93 self.arr[15].wrapping_add(rhs.arr[15]),
94 ]}
95 }
96 }
97 }
98}
99
100impl Sub for i8x16 {
101 type Output = Self;
102 #[inline]
103 fn sub(self, rhs: Self) -> Self::Output {
104 pick! {
105 if #[cfg(target_feature="sse2")] {
106 Self { sse: sub_i8_m128i(self.sse, rhs.sse) }
107 } else if #[cfg(target_feature="simd128")] {
108 Self { simd: i8x16_sub(self.simd, rhs.simd) }
109 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
110 unsafe {Self { neon: vsubq_s8(self.neon, rhs.neon) }}
111 } else {
112 Self { arr: [
113 self.arr[0].wrapping_sub(rhs.arr[0]),
114 self.arr[1].wrapping_sub(rhs.arr[1]),
115 self.arr[2].wrapping_sub(rhs.arr[2]),
116 self.arr[3].wrapping_sub(rhs.arr[3]),
117 self.arr[4].wrapping_sub(rhs.arr[4]),
118 self.arr[5].wrapping_sub(rhs.arr[5]),
119 self.arr[6].wrapping_sub(rhs.arr[6]),
120 self.arr[7].wrapping_sub(rhs.arr[7]),
121 self.arr[8].wrapping_sub(rhs.arr[8]),
122 self.arr[9].wrapping_sub(rhs.arr[9]),
123 self.arr[10].wrapping_sub(rhs.arr[10]),
124 self.arr[11].wrapping_sub(rhs.arr[11]),
125 self.arr[12].wrapping_sub(rhs.arr[12]),
126 self.arr[13].wrapping_sub(rhs.arr[13]),
127 self.arr[14].wrapping_sub(rhs.arr[14]),
128 self.arr[15].wrapping_sub(rhs.arr[15]),
129 ]}
130 }
131 }
132 }
133}
134
135impl Mul for i8x16 {
136 type Output = Self;
137
138 #[inline]
139 fn mul(self, rhs: Self) -> Self::Output {
140 pick! {
144 if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
145 unsafe { Self { neon: vmulq_s8(self.neon, rhs.neon) } }
146 } else {
147 let self_array: [i8; 16] = cast(self);
148 let rhs_array: [i8; 16] = cast(rhs);
149
150 Self::new([
151 self_array[0].wrapping_mul(rhs_array[0]),
152 self_array[1].wrapping_mul(rhs_array[1]),
153 self_array[2].wrapping_mul(rhs_array[2]),
154 self_array[3].wrapping_mul(rhs_array[3]),
155 self_array[4].wrapping_mul(rhs_array[4]),
156 self_array[5].wrapping_mul(rhs_array[5]),
157 self_array[6].wrapping_mul(rhs_array[6]),
158 self_array[7].wrapping_mul(rhs_array[7]),
159 self_array[8].wrapping_mul(rhs_array[8]),
160 self_array[9].wrapping_mul(rhs_array[9]),
161 self_array[10].wrapping_mul(rhs_array[10]),
162 self_array[11].wrapping_mul(rhs_array[11]),
163 self_array[12].wrapping_mul(rhs_array[12]),
164 self_array[13].wrapping_mul(rhs_array[13]),
165 self_array[14].wrapping_mul(rhs_array[14]),
166 self_array[15].wrapping_mul(rhs_array[15]),
167 ])
168 }
169 }
170 }
171}
172
173integer_impl_div_rem!(
174 i8,
175 i8x16,
176 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
177);
178
179impl Shl for i8x16 {
180 type Output = Self;
181
182 #[inline]
188 fn shl(self, rhs: Self) -> Self::Output {
189 pick! {
193 if #[cfg(all(target_feature="neon", target_arch="aarch64"))] {
194 unsafe {
195 let shift_by = vandq_s8(rhs.neon, vmovq_n_s8(7));
197 Self { neon: vshlq_s8(self.neon, shift_by) }
198 }
199 } else {
200 let self_array: [i8; 16] = cast(self);
201 let rhs_array: [i8; 16] = cast(rhs);
202
203 Self::new([
204 self_array[0].wrapping_shl(rhs_array[0] as u32),
205 self_array[1].wrapping_shl(rhs_array[1] as u32),
206 self_array[2].wrapping_shl(rhs_array[2] as u32),
207 self_array[3].wrapping_shl(rhs_array[3] as u32),
208 self_array[4].wrapping_shl(rhs_array[4] as u32),
209 self_array[5].wrapping_shl(rhs_array[5] as u32),
210 self_array[6].wrapping_shl(rhs_array[6] as u32),
211 self_array[7].wrapping_shl(rhs_array[7] as u32),
212 self_array[8].wrapping_shl(rhs_array[8] as u32),
213 self_array[9].wrapping_shl(rhs_array[9] as u32),
214 self_array[10].wrapping_shl(rhs_array[10] as u32),
215 self_array[11].wrapping_shl(rhs_array[11] as u32),
216 self_array[12].wrapping_shl(rhs_array[12] as u32),
217 self_array[13].wrapping_shl(rhs_array[13] as u32),
218 self_array[14].wrapping_shl(rhs_array[14] as u32),
219 self_array[15].wrapping_shl(rhs_array[15] as u32),
220 ])
221 }
222 }
223 }
224}
225
226impl Shr for i8x16 {
227 type Output = Self;
228
229 #[inline]
230 fn shr(self, rhs: Self) -> Self::Output {
231 pick! {
235 if #[cfg(all(target_feature="neon", target_arch="aarch64"))] {
236 unsafe {
237 let neg_rhs = vnegq_s8(vandq_s8(rhs.neon, vmovq_n_s8(7)));
240 Self { neon: vshlq_s8(self.neon, neg_rhs) }
241 }
242 } else {
243 let self_array: [i8; 16] = cast(self);
244 let rhs_array: [i8; 16] = cast(rhs);
245
246 Self::new([
247 self_array[0].wrapping_shr(rhs_array[0] as u32),
248 self_array[1].wrapping_shr(rhs_array[1] as u32),
249 self_array[2].wrapping_shr(rhs_array[2] as u32),
250 self_array[3].wrapping_shr(rhs_array[3] as u32),
251 self_array[4].wrapping_shr(rhs_array[4] as u32),
252 self_array[5].wrapping_shr(rhs_array[5] as u32),
253 self_array[6].wrapping_shr(rhs_array[6] as u32),
254 self_array[7].wrapping_shr(rhs_array[7] as u32),
255 self_array[8].wrapping_shr(rhs_array[8] as u32),
256 self_array[9].wrapping_shr(rhs_array[9] as u32),
257 self_array[10].wrapping_shr(rhs_array[10] as u32),
258 self_array[11].wrapping_shr(rhs_array[11] as u32),
259 self_array[12].wrapping_shr(rhs_array[12] as u32),
260 self_array[13].wrapping_shr(rhs_array[13] as u32),
261 self_array[14].wrapping_shr(rhs_array[14] as u32),
262 self_array[15].wrapping_shr(rhs_array[15] as u32),
263 ])
264 }
265 }
266 }
267}
268
269impl Add<i8> for i8x16 {
270 type Output = Self;
271 #[inline]
272 fn add(self, rhs: i8) -> Self::Output {
273 self.add(Self::splat(rhs))
274 }
275}
276
277impl Sub<i8> for i8x16 {
278 type Output = Self;
279 #[inline]
280 fn sub(self, rhs: i8) -> Self::Output {
281 self.sub(Self::splat(rhs))
282 }
283}
284
285impl Mul<i8> for i8x16 {
286 type Output = Self;
287
288 #[inline]
289 fn mul(self, rhs: i8) -> Self::Output {
290 self * Self::splat(rhs)
291 }
292}
293
294macro_rules! impl_shl_scalar {
295 ($Rhs:ident) => {
296 impl Shl<$Rhs> for i8x16 {
297 type Output = Self;
298
299 #[inline]
305 fn shl(self, rhs: $Rhs) -> Self::Output {
306 pick! {
310 if #[cfg(target_feature="simd128")] {
311 Self { simd: i8x16_shl(self.simd, rhs as u32 & 7) }
313 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
314 unsafe { Self { neon: vshlq_s8(self.neon, vmovq_n_s8(rhs as i8 & 7)) } }
316 } else {
317 let self_array = self.to_array();
318 let rhs = rhs as u32;
319
320 cast([
321 self_array[0].wrapping_shl(rhs),
322 self_array[1].wrapping_shl(rhs),
323 self_array[2].wrapping_shl(rhs),
324 self_array[3].wrapping_shl(rhs),
325 self_array[4].wrapping_shl(rhs),
326 self_array[5].wrapping_shl(rhs),
327 self_array[6].wrapping_shl(rhs),
328 self_array[7].wrapping_shl(rhs),
329 self_array[8].wrapping_shl(rhs),
330 self_array[9].wrapping_shl(rhs),
331 self_array[10].wrapping_shl(rhs),
332 self_array[11].wrapping_shl(rhs),
333 self_array[12].wrapping_shl(rhs),
334 self_array[13].wrapping_shl(rhs),
335 self_array[14].wrapping_shl(rhs),
336 self_array[15].wrapping_shl(rhs),
337 ])
338 }
339 }
340 }
341 }
342 };
343}
344impl_shl_scalar!(i8);
345impl_shl_scalar!(u8);
346impl_shl_scalar!(i16);
347impl_shl_scalar!(u16);
348impl_shl_scalar!(i32);
349impl_shl_scalar!(u32);
350impl_shl_scalar!(i64);
351impl_shl_scalar!(u64);
352impl_shl_scalar!(i128);
353impl_shl_scalar!(u128);
354
355macro_rules! impl_shr_scalar {
356 ($Rhs:ident) => {
357 impl Shr<$Rhs> for i8x16 {
358 type Output = Self;
359
360 #[inline]
366 fn shr(self, rhs: $Rhs) -> Self::Output {
367 pick! {
371 if #[cfg(target_feature="simd128")] {
372 Self { simd: i8x16_shr(self.simd, rhs as u32 & 7) }
374 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
375 unsafe { Self { neon: vshlq_s8(self.neon, vmovq_n_s8(-(rhs as i8 & 7))) } }
378 } else {
379 let self_array = self.to_array();
380 let rhs = rhs as u32;
381
382 cast([
383 self_array[0].wrapping_shr(rhs),
384 self_array[1].wrapping_shr(rhs),
385 self_array[2].wrapping_shr(rhs),
386 self_array[3].wrapping_shr(rhs),
387 self_array[4].wrapping_shr(rhs),
388 self_array[5].wrapping_shr(rhs),
389 self_array[6].wrapping_shr(rhs),
390 self_array[7].wrapping_shr(rhs),
391 self_array[8].wrapping_shr(rhs),
392 self_array[9].wrapping_shr(rhs),
393 self_array[10].wrapping_shr(rhs),
394 self_array[11].wrapping_shr(rhs),
395 self_array[12].wrapping_shr(rhs),
396 self_array[13].wrapping_shr(rhs),
397 self_array[14].wrapping_shr(rhs),
398 self_array[15].wrapping_shr(rhs),
399 ])
400 }
401 }
402 }
403 }
404 };
405}
406impl_shr_scalar!(i8);
407impl_shr_scalar!(u8);
408impl_shr_scalar!(i16);
409impl_shr_scalar!(u16);
410impl_shr_scalar!(i32);
411impl_shr_scalar!(u32);
412impl_shr_scalar!(i64);
413impl_shr_scalar!(u64);
414impl_shr_scalar!(i128);
415impl_shr_scalar!(u128);
416
417impl Add<i8x16> for i8 {
418 type Output = i8x16;
419 #[inline]
420 fn add(self, rhs: i8x16) -> Self::Output {
421 i8x16::splat(self).add(rhs)
422 }
423}
424
425impl Sub<i8x16> for i8 {
426 type Output = i8x16;
427 #[inline]
428 fn sub(self, rhs: i8x16) -> Self::Output {
429 i8x16::splat(self).sub(rhs)
430 }
431}
432
433impl Mul<i8x16> for i8 {
434 type Output = i8x16;
435
436 #[inline]
437 fn mul(self, rhs: i8x16) -> Self::Output {
438 i8x16::splat(self) * rhs
439 }
440}
441
442impl BitAnd for i8x16 {
443 type Output = Self;
444 #[inline]
445 fn bitand(self, rhs: Self) -> Self::Output {
446 pick! {
447 if #[cfg(target_feature="sse2")] {
448 Self { sse: bitand_m128i(self.sse, rhs.sse) }
449 } else if #[cfg(target_feature="simd128")] {
450 Self { simd: v128_and(self.simd, rhs.simd) }
451 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
452 unsafe {Self { neon: vandq_s8(self.neon, rhs.neon) }}
453 } else {
454 Self { arr: [
455 self.arr[0].bitand(rhs.arr[0]),
456 self.arr[1].bitand(rhs.arr[1]),
457 self.arr[2].bitand(rhs.arr[2]),
458 self.arr[3].bitand(rhs.arr[3]),
459 self.arr[4].bitand(rhs.arr[4]),
460 self.arr[5].bitand(rhs.arr[5]),
461 self.arr[6].bitand(rhs.arr[6]),
462 self.arr[7].bitand(rhs.arr[7]),
463 self.arr[8].bitand(rhs.arr[8]),
464 self.arr[9].bitand(rhs.arr[9]),
465 self.arr[10].bitand(rhs.arr[10]),
466 self.arr[11].bitand(rhs.arr[11]),
467 self.arr[12].bitand(rhs.arr[12]),
468 self.arr[13].bitand(rhs.arr[13]),
469 self.arr[14].bitand(rhs.arr[14]),
470 self.arr[15].bitand(rhs.arr[15]),
471 ]}
472 }
473 }
474 }
475}
476
477impl BitOr for i8x16 {
478 type Output = Self;
479 #[inline]
480 fn bitor(self, rhs: Self) -> Self::Output {
481 pick! {
482 if #[cfg(target_feature="sse2")] {
483 Self { sse: bitor_m128i(self.sse, rhs.sse) }
484 } else if #[cfg(target_feature="simd128")] {
485 Self { simd: v128_or(self.simd, rhs.simd) }
486 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
487 unsafe {Self { neon: vorrq_s8(self.neon, rhs.neon) }}
488 } else {
489 Self { arr: [
490 self.arr[0].bitor(rhs.arr[0]),
491 self.arr[1].bitor(rhs.arr[1]),
492 self.arr[2].bitor(rhs.arr[2]),
493 self.arr[3].bitor(rhs.arr[3]),
494 self.arr[4].bitor(rhs.arr[4]),
495 self.arr[5].bitor(rhs.arr[5]),
496 self.arr[6].bitor(rhs.arr[6]),
497 self.arr[7].bitor(rhs.arr[7]),
498 self.arr[8].bitor(rhs.arr[8]),
499 self.arr[9].bitor(rhs.arr[9]),
500 self.arr[10].bitor(rhs.arr[10]),
501 self.arr[11].bitor(rhs.arr[11]),
502 self.arr[12].bitor(rhs.arr[12]),
503 self.arr[13].bitor(rhs.arr[13]),
504 self.arr[14].bitor(rhs.arr[14]),
505 self.arr[15].bitor(rhs.arr[15]),
506 ]}
507 }
508 }
509 }
510}
511
512impl BitXor for i8x16 {
513 type Output = Self;
514 #[inline]
515 fn bitxor(self, rhs: Self) -> Self::Output {
516 pick! {
517 if #[cfg(target_feature="sse2")] {
518 Self { sse: bitxor_m128i(self.sse, rhs.sse) }
519 } else if #[cfg(target_feature="simd128")] {
520 Self { simd: v128_xor(self.simd, rhs.simd) }
521 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
522 unsafe {Self { neon: veorq_s8(self.neon, rhs.neon) }}
523 } else {
524 Self { arr: [
525 self.arr[0].bitxor(rhs.arr[0]),
526 self.arr[1].bitxor(rhs.arr[1]),
527 self.arr[2].bitxor(rhs.arr[2]),
528 self.arr[3].bitxor(rhs.arr[3]),
529 self.arr[4].bitxor(rhs.arr[4]),
530 self.arr[5].bitxor(rhs.arr[5]),
531 self.arr[6].bitxor(rhs.arr[6]),
532 self.arr[7].bitxor(rhs.arr[7]),
533 self.arr[8].bitxor(rhs.arr[8]),
534 self.arr[9].bitxor(rhs.arr[9]),
535 self.arr[10].bitxor(rhs.arr[10]),
536 self.arr[11].bitxor(rhs.arr[11]),
537 self.arr[12].bitxor(rhs.arr[12]),
538 self.arr[13].bitxor(rhs.arr[13]),
539 self.arr[14].bitxor(rhs.arr[14]),
540 self.arr[15].bitxor(rhs.arr[15]),
541 ]}
542 }
543 }
544 }
545}
546
547#[expect(deprecated)]
548impl CmpEq for i8x16 {
549 type Output = Self;
550 #[inline]
551 fn simd_eq(self, rhs: Self) -> Self::Output {
552 pick! {
553 if #[cfg(target_feature="sse2")] {
554 Self { sse: cmp_eq_mask_i8_m128i(self.sse, rhs.sse) }
555 } else if #[cfg(target_feature="simd128")] {
556 Self { simd: i8x16_eq(self.simd, rhs.simd) }
557 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
558 unsafe {Self { neon: vreinterpretq_s8_u8(vceqq_s8(self.neon, rhs.neon)) }}
559 } else {
560 Self { arr: [
561 if self.arr[0] == rhs.arr[0] { -1 } else { 0 },
562 if self.arr[1] == rhs.arr[1] { -1 } else { 0 },
563 if self.arr[2] == rhs.arr[2] { -1 } else { 0 },
564 if self.arr[3] == rhs.arr[3] { -1 } else { 0 },
565 if self.arr[4] == rhs.arr[4] { -1 } else { 0 },
566 if self.arr[5] == rhs.arr[5] { -1 } else { 0 },
567 if self.arr[6] == rhs.arr[6] { -1 } else { 0 },
568 if self.arr[7] == rhs.arr[7] { -1 } else { 0 },
569 if self.arr[8] == rhs.arr[8] { -1 } else { 0 },
570 if self.arr[9] == rhs.arr[9] { -1 } else { 0 },
571 if self.arr[10] == rhs.arr[10] { -1 } else { 0 },
572 if self.arr[11] == rhs.arr[11] { -1 } else { 0 },
573 if self.arr[12] == rhs.arr[12] { -1 } else { 0 },
574 if self.arr[13] == rhs.arr[13] { -1 } else { 0 },
575 if self.arr[14] == rhs.arr[14] { -1 } else { 0 },
576 if self.arr[15] == rhs.arr[15] { -1 } else { 0 },
577 ]}
578 }
579 }
580 }
581}
582
583#[expect(deprecated)]
584impl CmpGt for i8x16 {
585 type Output = Self;
586 #[inline]
587 fn simd_gt(self, rhs: Self) -> Self::Output {
588 pick! {
589 if #[cfg(target_feature="sse2")] {
590 Self { sse: cmp_gt_mask_i8_m128i(self.sse, rhs.sse) }
591 } else if #[cfg(target_feature="simd128")] {
592 Self { simd: i8x16_gt(self.simd, rhs.simd) }
593 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
594 unsafe {Self { neon: vreinterpretq_s8_u8(vcgtq_s8(self.neon, rhs.neon)) }}
595 } else {
596 Self { arr: [
597 if self.arr[0] > rhs.arr[0] { -1 } else { 0 },
598 if self.arr[1] > rhs.arr[1] { -1 } else { 0 },
599 if self.arr[2] > rhs.arr[2] { -1 } else { 0 },
600 if self.arr[3] > rhs.arr[3] { -1 } else { 0 },
601 if self.arr[4] > rhs.arr[4] { -1 } else { 0 },
602 if self.arr[5] > rhs.arr[5] { -1 } else { 0 },
603 if self.arr[6] > rhs.arr[6] { -1 } else { 0 },
604 if self.arr[7] > rhs.arr[7] { -1 } else { 0 },
605 if self.arr[8] > rhs.arr[8] { -1 } else { 0 },
606 if self.arr[9] > rhs.arr[9] { -1 } else { 0 },
607 if self.arr[10] > rhs.arr[10] { -1 } else { 0 },
608 if self.arr[11] > rhs.arr[11] { -1 } else { 0 },
609 if self.arr[12] > rhs.arr[12] { -1 } else { 0 },
610 if self.arr[13] > rhs.arr[13] { -1 } else { 0 },
611 if self.arr[14] > rhs.arr[14] { -1 } else { 0 },
612 if self.arr[15] > rhs.arr[15] { -1 } else { 0 },
613 ]}
614 }
615 }
616 }
617}
618
619#[expect(deprecated)]
620impl CmpLt for i8x16 {
621 type Output = Self;
622 #[inline]
623 fn simd_lt(self, rhs: Self) -> Self::Output {
624 pick! {
625 if #[cfg(target_feature="sse2")] {
626 Self { sse: cmp_lt_mask_i8_m128i(self.sse, rhs.sse) }
627 } else if #[cfg(target_feature="simd128")] {
628 Self { simd: i8x16_lt(self.simd, rhs.simd) }
629 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
630 unsafe {Self { neon: vreinterpretq_s8_u8(vcltq_s8(self.neon, rhs.neon)) }}
631 } else {
632 Self { arr: [
633 if self.arr[0] < rhs.arr[0] { -1 } else { 0 },
634 if self.arr[1] < rhs.arr[1] { -1 } else { 0 },
635 if self.arr[2] < rhs.arr[2] { -1 } else { 0 },
636 if self.arr[3] < rhs.arr[3] { -1 } else { 0 },
637 if self.arr[4] < rhs.arr[4] { -1 } else { 0 },
638 if self.arr[5] < rhs.arr[5] { -1 } else { 0 },
639 if self.arr[6] < rhs.arr[6] { -1 } else { 0 },
640 if self.arr[7] < rhs.arr[7] { -1 } else { 0 },
641 if self.arr[8] < rhs.arr[8] { -1 } else { 0 },
642 if self.arr[9] < rhs.arr[9] { -1 } else { 0 },
643 if self.arr[10] < rhs.arr[10] { -1 } else { 0 },
644 if self.arr[11] < rhs.arr[11] { -1 } else { 0 },
645 if self.arr[12] < rhs.arr[12] { -1 } else { 0 },
646 if self.arr[13] < rhs.arr[13] { -1 } else { 0 },
647 if self.arr[14] < rhs.arr[14] { -1 } else { 0 },
648 if self.arr[15] < rhs.arr[15] { -1 } else { 0 },
649 ]}
650 }
651 }
652 }
653}
654
655#[expect(deprecated)]
656impl CmpNe for i8x16 {
657 type Output = Self;
658 #[inline]
659 fn simd_ne(self, rhs: Self) -> Self::Output {
660 pick! {
661 if #[cfg(target_feature="sse2")] {
662 !self.simd_eq(rhs)
663 } else if #[cfg(target_feature="simd128")] {
664 Self { simd: i8x16_ne(self.simd, rhs.simd) }
665 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
666 !self.simd_eq(rhs)
667 } else {
668 Self { arr: [
669 if self.arr[0] != rhs.arr[0] { -1 } else { 0 },
670 if self.arr[1] != rhs.arr[1] { -1 } else { 0 },
671 if self.arr[2] != rhs.arr[2] { -1 } else { 0 },
672 if self.arr[3] != rhs.arr[3] { -1 } else { 0 },
673 if self.arr[4] != rhs.arr[4] { -1 } else { 0 },
674 if self.arr[5] != rhs.arr[5] { -1 } else { 0 },
675 if self.arr[6] != rhs.arr[6] { -1 } else { 0 },
676 if self.arr[7] != rhs.arr[7] { -1 } else { 0 },
677 if self.arr[8] != rhs.arr[8] { -1 } else { 0 },
678 if self.arr[9] != rhs.arr[9] { -1 } else { 0 },
679 if self.arr[10] != rhs.arr[10] { -1 } else { 0 },
680 if self.arr[11] != rhs.arr[11] { -1 } else { 0 },
681 if self.arr[12] != rhs.arr[12] { -1 } else { 0 },
682 if self.arr[13] != rhs.arr[13] { -1 } else { 0 },
683 if self.arr[14] != rhs.arr[14] { -1 } else { 0 },
684 if self.arr[15] != rhs.arr[15] { -1 } else { 0 },
685 ]}
686 }
687 }
688 }
689}
690
691#[expect(deprecated)]
692impl CmpLe for i8x16 {
693 type Output = Self;
694 #[inline]
695 fn simd_le(self, rhs: Self) -> Self::Output {
696 pick! {
697 if #[cfg(target_feature="sse2")] {
698 !self.simd_gt(rhs)
699 } else if #[cfg(target_feature="simd128")] {
700 Self { simd: i8x16_le(self.simd, rhs.simd) }
701 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
702 !self.simd_gt(rhs)
703 } else {
704 Self { arr: [
705 if self.arr[0] <= rhs.arr[0] { -1 } else { 0 },
706 if self.arr[1] <= rhs.arr[1] { -1 } else { 0 },
707 if self.arr[2] <= rhs.arr[2] { -1 } else { 0 },
708 if self.arr[3] <= rhs.arr[3] { -1 } else { 0 },
709 if self.arr[4] <= rhs.arr[4] { -1 } else { 0 },
710 if self.arr[5] <= rhs.arr[5] { -1 } else { 0 },
711 if self.arr[6] <= rhs.arr[6] { -1 } else { 0 },
712 if self.arr[7] <= rhs.arr[7] { -1 } else { 0 },
713 if self.arr[8] <= rhs.arr[8] { -1 } else { 0 },
714 if self.arr[9] <= rhs.arr[9] { -1 } else { 0 },
715 if self.arr[10] <= rhs.arr[10] { -1 } else { 0 },
716 if self.arr[11] <= rhs.arr[11] { -1 } else { 0 },
717 if self.arr[12] <= rhs.arr[12] { -1 } else { 0 },
718 if self.arr[13] <= rhs.arr[13] { -1 } else { 0 },
719 if self.arr[14] <= rhs.arr[14] { -1 } else { 0 },
720 if self.arr[15] <= rhs.arr[15] { -1 } else { 0 },
721 ]}
722 }
723 }
724 }
725}
726
727#[expect(deprecated)]
728impl CmpGe for i8x16 {
729 type Output = Self;
730 #[inline]
731 fn simd_ge(self, rhs: Self) -> Self::Output {
732 pick! {
733 if #[cfg(target_feature="sse2")] {
734 !self.simd_lt(rhs)
735 } else if #[cfg(target_feature="simd128")] {
736 Self { simd: i8x16_ge(self.simd, rhs.simd) }
737 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
738 !self.simd_lt(rhs)
739 } else {
740 Self { arr: [
741 if self.arr[0] >= rhs.arr[0] { -1 } else { 0 },
742 if self.arr[1] >= rhs.arr[1] { -1 } else { 0 },
743 if self.arr[2] >= rhs.arr[2] { -1 } else { 0 },
744 if self.arr[3] >= rhs.arr[3] { -1 } else { 0 },
745 if self.arr[4] >= rhs.arr[4] { -1 } else { 0 },
746 if self.arr[5] >= rhs.arr[5] { -1 } else { 0 },
747 if self.arr[6] >= rhs.arr[6] { -1 } else { 0 },
748 if self.arr[7] >= rhs.arr[7] { -1 } else { 0 },
749 if self.arr[8] >= rhs.arr[8] { -1 } else { 0 },
750 if self.arr[9] >= rhs.arr[9] { -1 } else { 0 },
751 if self.arr[10] >= rhs.arr[10] { -1 } else { 0 },
752 if self.arr[11] >= rhs.arr[11] { -1 } else { 0 },
753 if self.arr[12] >= rhs.arr[12] { -1 } else { 0 },
754 if self.arr[13] >= rhs.arr[13] { -1 } else { 0 },
755 if self.arr[14] >= rhs.arr[14] { -1 } else { 0 },
756 if self.arr[15] >= rhs.arr[15] { -1 } else { 0 },
757 ]}
758 }
759 }
760 }
761}
762
763impl i8x16 {
764 #[inline]
765 #[must_use]
766 pub const fn new(array: [i8; 16]) -> Self {
767 unsafe { core::mem::transmute(array) }
768 }
769
770 simd_comparison_fns!();
771
772 #[inline]
774 #[must_use]
775 pub fn from_i16x16_saturate(v: i16x16) -> i8x16 {
776 pick! {
777 if #[cfg(target_feature="avx2")] {
778 i8x16 { sse: pack_i16_to_i8_m128i( extract_m128i_from_m256i::<0>(v.avx2), extract_m128i_from_m256i::<1>(v.avx2)) }
779 } else if #[cfg(target_feature="sse2")] {
780 i8x16 { sse: pack_i16_to_i8_m128i( v.a.sse, v.b.sse ) }
781 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
782 use core::arch::aarch64::*;
783
784 unsafe {
785 i8x16 { neon: vcombine_s8(vqmovn_s16(v.a.neon), vqmovn_s16(v.b.neon)) }
786 }
787 } else if #[cfg(target_feature="simd128")] {
788 use core::arch::wasm32::*;
789
790 i8x16 { simd: i8x16_narrow_i16x8(v.a.simd, v.b.simd) }
791 } else {
792 fn clamp(a : i16) -> i8 {
793 if a < i8::MIN as i16 {
794 i8::MIN
795 }
796 else if a > i8::MAX as i16 {
797 i8::MAX
798 } else {
799 a as i8
800 }
801 }
802
803 i8x16::new([
804 clamp(v.as_array()[0]),
805 clamp(v.as_array()[1]),
806 clamp(v.as_array()[2]),
807 clamp(v.as_array()[3]),
808 clamp(v.as_array()[4]),
809 clamp(v.as_array()[5]),
810 clamp(v.as_array()[6]),
811 clamp(v.as_array()[7]),
812 clamp(v.as_array()[8]),
813 clamp(v.as_array()[9]),
814 clamp(v.as_array()[10]),
815 clamp(v.as_array()[11]),
816 clamp(v.as_array()[12]),
817 clamp(v.as_array()[13]),
818 clamp(v.as_array()[14]),
819 clamp(v.as_array()[15]),
820 ])
821 }
822 }
823 }
824
825 #[inline]
827 #[must_use]
828 pub fn from_i16x16_truncate(v: i16x16) -> i8x16 {
829 pick! {
830 if #[cfg(target_feature="avx2")] {
831 let a = v.avx2.bitand(set_splat_i16_m256i(0xff));
832 i8x16 { sse: pack_i16_to_u8_m128i( extract_m128i_from_m256i::<0>(a), extract_m128i_from_m256i::<1>(a)) }
833 } else if #[cfg(target_feature="sse2")] {
834 let mask = set_splat_i16_m128i(0xff);
835 i8x16 { sse: pack_i16_to_u8_m128i( v.a.sse.bitand(mask), v.b.sse.bitand(mask) ) }
836 } else {
837 i8x16::new([
839 v.as_array()[0] as i8,
840 v.as_array()[1] as i8,
841 v.as_array()[2] as i8,
842 v.as_array()[3] as i8,
843 v.as_array()[4] as i8,
844 v.as_array()[5] as i8,
845 v.as_array()[6] as i8,
846 v.as_array()[7] as i8,
847 v.as_array()[8] as i8,
848 v.as_array()[9] as i8,
849 v.as_array()[10] as i8,
850 v.as_array()[11] as i8,
851 v.as_array()[12] as i8,
852 v.as_array()[13] as i8,
853 v.as_array()[14] as i8,
854 v.as_array()[15] as i8,
855 ])
856 }
857 }
858 }
859
860 #[inline]
861 #[must_use]
862 pub fn blend(self, t: Self, f: Self) -> Self {
863 pick! {
864 if #[cfg(target_feature="sse4.1")] {
865 Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
866 } else if #[cfg(target_feature="simd128")] {
867 Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
868 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
869 unsafe {Self { neon: vbslq_s8(vreinterpretq_u8_s8(self.neon), t.neon, f.neon) }}
870 } else {
871 generic_bit_blend(self, t, f)
872 }
873 }
874 }
875
876 #[inline]
879 #[must_use]
880 pub fn is_positive(self) -> Self {
881 pick! {
882 if #[cfg(all(target_feature="neon", target_arch="aarch64"))] {
883 Self { neon: unsafe { vreinterpretq_s8_u8(vcgtzq_s8(self.neon)) } }
884 } else {
885 self.simd_gt(Self::ZERO)
886 }
887 }
888 }
889
890 #[inline]
893 #[must_use]
894 pub fn is_negative(self) -> Self {
895 pick! {
896 if #[cfg(all(target_feature="neon", target_arch="aarch64"))] {
897 Self { neon: unsafe { vreinterpretq_s8_u8(vcltzq_s8(self.neon)) } }
898 } else {
899 self.simd_lt(Self::ZERO)
900 }
901 }
902 }
903
904 #[inline]
905 #[must_use]
906 pub fn reduce_add(self) -> i8 {
907 #[allow(dead_code)]
908 const SHUFFLE_1: [i8; 16] =
909 [8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0];
910 #[allow(dead_code)]
911 const SHUFFLE_2: [i8; 16] =
912 [4, 5, 6, 7, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0];
913 #[allow(dead_code)]
914 const SHUFFLE_3: [i8; 16] =
915 [2, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
916 #[allow(dead_code)]
917 const SHUFFLE_4: [i8; 16] =
918 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
919
920 pick! {
921 if #[cfg(target_feature="ssse3")] {
922 let rhs = shuffle_av_i8z_all_m128i(self.sse, m128i::from(SHUFFLE_1));
923 let sum = add_i8_m128i(self.sse, rhs);
924 let rhs = shuffle_av_i8z_all_m128i(sum, m128i::from(SHUFFLE_2));
925 let sum = add_i8_m128i(sum, rhs);
926 let rhs = shuffle_av_i8z_all_m128i(sum, m128i::from(SHUFFLE_3));
927 let sum = add_i8_m128i(sum, rhs);
928 let rhs = shuffle_av_i8z_all_m128i(sum, m128i::from(SHUFFLE_4));
929 let sum = add_i8_m128i(sum, rhs);
930 get_i32_from_m128i_s(sum) as i8
931 } else if #[cfg(target_feature="simd128")] {
932 let rhs = i8x16_shuffle::<8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7>(self.simd, self.simd);
933 let sum = i8x16_add(self.simd, rhs);
934 let rhs = i8x16_shuffle::<4, 5, 6, 7, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0>(sum, sum);
935 let sum = i8x16_add(sum, rhs);
936 let rhs = i8x16_shuffle::<2, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>(sum, sum);
937 let sum = i8x16_add(sum, rhs);
938 let rhs = i8x16_shuffle::<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>(sum, sum);
939 let sum = i8x16_add(sum, rhs);
940 i8x16_extract_lane::<0>(sum)
941 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
942 unsafe {
943 let rhs = vqtbl1q_s8(self.neon, core::mem::transmute(SHUFFLE_1));
946 let sum = vaddq_s8(self.neon, rhs);
947 let rhs = vqtbl1q_s8(sum, core::mem::transmute(SHUFFLE_2));
948 let sum = vaddq_s8(sum, rhs);
949 let rhs = vqtbl1q_s8(sum, core::mem::transmute(SHUFFLE_3));
950 let sum = vaddq_s8(sum, rhs);
951 let rhs = vqtbl1q_s8(sum, core::mem::transmute(SHUFFLE_4));
952 let sum = vaddq_s8(sum, rhs);
953 vgetq_lane_s8(sum, 0)
954 }
955 } else {
956 let array: [i8; 16] = cast(self);
957 array.into_iter().reduce(i8::wrapping_add).unwrap()
958 }
959 }
960 }
961
962 #[inline]
963 #[must_use]
964 pub fn reduce_max(self) -> i8 {
965 #[allow(dead_code)]
966 const SHUFFLE_1: [i8; 16] =
967 [8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0];
968 #[allow(dead_code)]
969 const SHUFFLE_2: [i8; 16] =
970 [4, 5, 6, 7, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0];
971 #[allow(dead_code)]
972 const SHUFFLE_3: [i8; 16] =
973 [2, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
974 #[allow(dead_code)]
975 const SHUFFLE_4: [i8; 16] =
976 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
977
978 pick! {
979 if #[cfg(all(target_feature="ssse3", target_feature="sse4.1"))] {
980 let rhs = shuffle_av_i8z_all_m128i(self.sse, m128i::from(SHUFFLE_1));
981 let max = max_i8_m128i(self.sse, rhs);
982 let rhs = shuffle_av_i8z_all_m128i(max, m128i::from(SHUFFLE_2));
983 let max = max_i8_m128i(max, rhs);
984 let rhs = shuffle_av_i8z_all_m128i(max, m128i::from(SHUFFLE_3));
985 let max = max_i8_m128i(max, rhs);
986 let rhs = shuffle_av_i8z_all_m128i(max, m128i::from(SHUFFLE_4));
987 let max = max_i8_m128i(max, rhs);
988 get_i32_from_m128i_s(max) as i8
989 } else if #[cfg(target_feature="simd128")] {
990 let rhs = i8x16_shuffle::<8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7>(self.simd, self.simd);
991 let max = i8x16_max(self.simd, rhs);
992 let rhs = i8x16_shuffle::<4, 5, 6, 7, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0>(max, max);
993 let max = i8x16_max(max, rhs);
994 let rhs = i8x16_shuffle::<2, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>(max, max);
995 let max = i8x16_max(max, rhs);
996 let rhs = i8x16_shuffle::<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>(max, max);
997 let max = i8x16_max(max, rhs);
998 i8x16_extract_lane::<0>(max)
999 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1000 unsafe {
1001 let rhs = vqtbl1q_s8(self.neon, core::mem::transmute(SHUFFLE_1));
1004 let max = vmaxq_s8(self.neon, rhs);
1005 let rhs = vqtbl1q_s8(max, core::mem::transmute(SHUFFLE_2));
1006 let max = vmaxq_s8(max, rhs);
1007 let rhs = vqtbl1q_s8(max, core::mem::transmute(SHUFFLE_3));
1008 let max = vmaxq_s8(max, rhs);
1009 let rhs = vqtbl1q_s8(max, core::mem::transmute(SHUFFLE_4));
1010 let max = vmaxq_s8(max, rhs);
1011 vgetq_lane_s8(max, 0)
1012 }
1013 } else {
1014 let array: [i8; 16] = cast(self);
1015 array.into_iter().reduce(i8::max).unwrap()
1016 }
1017 }
1018 }
1019
1020 #[inline]
1021 #[must_use]
1022 pub fn reduce_min(self) -> i8 {
1023 #[allow(dead_code)]
1024 const SHUFFLE_1: [i8; 16] =
1025 [8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0];
1026 #[allow(dead_code)]
1027 const SHUFFLE_2: [i8; 16] =
1028 [4, 5, 6, 7, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0];
1029 #[allow(dead_code)]
1030 const SHUFFLE_3: [i8; 16] =
1031 [2, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
1032 #[allow(dead_code)]
1033 const SHUFFLE_4: [i8; 16] =
1034 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
1035
1036 pick! {
1037 if #[cfg(all(target_feature="ssse3", target_feature="sse4.1"))] {
1038 let rhs = shuffle_av_i8z_all_m128i(self.sse, m128i::from(SHUFFLE_1));
1039 let min = min_i8_m128i(self.sse, rhs);
1040 let rhs = shuffle_av_i8z_all_m128i(min, m128i::from(SHUFFLE_2));
1041 let min = min_i8_m128i(min, rhs);
1042 let rhs = shuffle_av_i8z_all_m128i(min, m128i::from(SHUFFLE_3));
1043 let min = min_i8_m128i(min, rhs);
1044 let rhs = shuffle_av_i8z_all_m128i(min, m128i::from(SHUFFLE_4));
1045 let min = min_i8_m128i(min, rhs);
1046 get_i32_from_m128i_s(min) as i8
1047 } else if #[cfg(target_feature="simd128")] {
1048 let rhs = i8x16_shuffle::<8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7>(self.simd, self.simd);
1049 let min = i8x16_min(self.simd, rhs);
1050 let rhs = i8x16_shuffle::<4, 5, 6, 7, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0>(min, min);
1051 let min = i8x16_min(min, rhs);
1052 let rhs = i8x16_shuffle::<2, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>(min, min);
1053 let min = i8x16_min(min, rhs);
1054 let rhs = i8x16_shuffle::<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>(min, min);
1055 let min = i8x16_min(min, rhs);
1056 i8x16_extract_lane::<0>(min)
1057 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1058 unsafe {
1059 let rhs = vqtbl1q_s8(self.neon, core::mem::transmute(SHUFFLE_1));
1062 let min = vminq_s8(self.neon, rhs);
1063 let rhs = vqtbl1q_s8(min, core::mem::transmute(SHUFFLE_2));
1064 let min = vminq_s8(min, rhs);
1065 let rhs = vqtbl1q_s8(min, core::mem::transmute(SHUFFLE_3));
1066 let min = vminq_s8(min, rhs);
1067 let rhs = vqtbl1q_s8(min, core::mem::transmute(SHUFFLE_4));
1068 let min = vminq_s8(min, rhs);
1069 vgetq_lane_s8(min, 0)
1070 }
1071 } else {
1072 let array: [i8; 16] = cast(self);
1073 array.into_iter().reduce(i8::min).unwrap()
1074 }
1075 }
1076 }
1077
1078 #[inline]
1079 #[must_use]
1080 pub fn abs(self) -> Self {
1081 pick! {
1082 if #[cfg(target_feature="ssse3")] {
1083 Self { sse: abs_i8_m128i(self.sse) }
1084 } else if #[cfg(target_feature="simd128")] {
1085 Self { simd: i8x16_abs(self.simd) }
1086 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1087 unsafe {Self { neon: vabsq_s8(self.neon) }}
1088 } else {
1089 let arr: [i8; 16] = cast(self);
1090 cast([
1091 arr[0].wrapping_abs(),
1092 arr[1].wrapping_abs(),
1093 arr[2].wrapping_abs(),
1094 arr[3].wrapping_abs(),
1095 arr[4].wrapping_abs(),
1096 arr[5].wrapping_abs(),
1097 arr[6].wrapping_abs(),
1098 arr[7].wrapping_abs(),
1099 arr[8].wrapping_abs(),
1100 arr[9].wrapping_abs(),
1101 arr[10].wrapping_abs(),
1102 arr[11].wrapping_abs(),
1103 arr[12].wrapping_abs(),
1104 arr[13].wrapping_abs(),
1105 arr[14].wrapping_abs(),
1106 arr[15].wrapping_abs(),
1107 ])
1108 }
1109 }
1110 }
1111
1112 #[inline]
1113 #[must_use]
1114 pub fn unsigned_abs(self) -> u8x16 {
1115 pick! {
1116 if #[cfg(target_feature="ssse3")] {
1117 u8x16 { sse: abs_i8_m128i(self.sse) }
1118 } else if #[cfg(target_feature="simd128")] {
1119 u8x16 { simd: i8x16_abs(self.simd) }
1120 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1121 unsafe { u8x16 { neon: vreinterpretq_u8_s8(vabsq_s8(self.neon)) }}
1122 } else {
1123 let arr: [i8; 16] = cast(self);
1124 cast(
1125 [
1126 arr[0].unsigned_abs(),
1127 arr[1].unsigned_abs(),
1128 arr[2].unsigned_abs(),
1129 arr[3].unsigned_abs(),
1130 arr[4].unsigned_abs(),
1131 arr[5].unsigned_abs(),
1132 arr[6].unsigned_abs(),
1133 arr[7].unsigned_abs(),
1134 arr[8].unsigned_abs(),
1135 arr[9].unsigned_abs(),
1136 arr[10].unsigned_abs(),
1137 arr[11].unsigned_abs(),
1138 arr[12].unsigned_abs(),
1139 arr[13].unsigned_abs(),
1140 arr[14].unsigned_abs(),
1141 arr[15].unsigned_abs(),
1142 ])
1143 }
1144 }
1145 }
1146
1147 signed_fn_signum!();
1148
1149 #[inline]
1150 #[must_use]
1151 pub fn max(self, rhs: Self) -> Self {
1152 pick! {
1153 if #[cfg(target_feature="sse4.1")] {
1154 Self { sse: max_i8_m128i(self.sse, rhs.sse) }
1155 } else if #[cfg(target_feature="simd128")] {
1156 Self { simd: i8x16_max(self.simd, rhs.simd) }
1157 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1158 unsafe {Self { neon: vmaxq_s8(self.neon, rhs.neon) }}
1159 } else {
1160 self.simd_lt(rhs).blend(rhs, self)
1161 }
1162 }
1163 }
1164 #[inline]
1165 #[must_use]
1166 pub fn min(self, rhs: Self) -> Self {
1167 pick! {
1168 if #[cfg(target_feature="sse4.1")] {
1169 Self { sse: min_i8_m128i(self.sse, rhs.sse) }
1170 } else if #[cfg(target_feature="simd128")] {
1171 Self { simd: i8x16_min(self.simd, rhs.simd) }
1172 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1173 unsafe {Self { neon: vminq_s8(self.neon, rhs.neon) }}
1174 } else {
1175 self.simd_lt(rhs).blend(self, rhs)
1176 }
1177 }
1178 }
1179
1180 integer_fn_clamp!();
1181
1182 #[inline]
1183 #[must_use]
1184 pub fn from_slice_unaligned(input: &[i8]) -> Self {
1185 assert!(input.len() >= 16);
1186
1187 pick! {
1188 if #[cfg(target_feature="sse2")] {
1189 unsafe { Self { sse: load_unaligned_m128i( &*(input.as_ptr() as * const [u8;16]) ) } }
1190 } else if #[cfg(target_feature="simd128")] {
1191 unsafe { Self { simd: v128_load(input.as_ptr() as *const v128 ) } }
1192 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1193 unsafe { Self { neon: vld1q_s8( input.as_ptr() as *const i8 ) } }
1194 } else {
1195 unsafe { Self::new( *(input.as_ptr() as * const [i8;16]) ) }
1197 }
1198 }
1199 }
1200
1201 #[inline]
1202 #[must_use]
1203 #[doc(alias("movemask", "move_mask"))]
1204 pub fn to_bitmask(self) -> u32 {
1205 pick! {
1206 if #[cfg(target_feature="sse2")] {
1207 move_mask_i8_m128i(self.sse) as u32
1208 } else if #[cfg(target_feature="simd128")] {
1209 i8x16_bitmask(self.simd) as u32
1210 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1211 unsafe
1212 {
1213 let masked = vcltq_s8(self.neon, vdupq_n_s8(0));
1215
1216 let selectbit : uint8x16_t = core::mem::transmute([1u8, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128]);
1218 let out = vandq_u8(masked, selectbit);
1219
1220 let table : uint8x16_t = core::mem::transmute([0u8, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15]);
1222 let r = vqtbl1q_u8(out, table);
1223
1224 vaddvq_u16(vreinterpretq_u16_u8(r)) as u32
1226 }
1227 } else {
1228 ((self.arr[0] < 0) as u32) << 0 |
1229 ((self.arr[1] < 0) as u32) << 1 |
1230 ((self.arr[2] < 0) as u32) << 2 |
1231 ((self.arr[3] < 0) as u32) << 3 |
1232 ((self.arr[4] < 0) as u32) << 4 |
1233 ((self.arr[5] < 0) as u32) << 5 |
1234 ((self.arr[6] < 0) as u32) << 6 |
1235 ((self.arr[7] < 0) as u32) << 7 |
1236 ((self.arr[8] < 0) as u32) << 8 |
1237 ((self.arr[9] < 0) as u32) << 9 |
1238 ((self.arr[10] < 0) as u32) << 10 |
1239 ((self.arr[11] < 0) as u32) << 11 |
1240 ((self.arr[12] < 0) as u32) << 12 |
1241 ((self.arr[13] < 0) as u32) << 13 |
1242 ((self.arr[14] < 0) as u32) << 14 |
1243 ((self.arr[15] < 0) as u32) << 15
1244 }
1245 }
1246 }
1247
1248 #[inline]
1249 #[must_use]
1250 pub fn any(self) -> bool {
1251 pick! {
1252 if #[cfg(target_feature="sse2")] {
1253 move_mask_i8_m128i(self.sse) != 0
1254 } else if #[cfg(target_feature="simd128")] {
1255 u8x16_bitmask(self.simd) != 0
1256 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
1257 unsafe {
1258 vminvq_s8(self.neon) < 0
1259 }
1260 } else {
1261 let v : [u64;2] = cast(self);
1262 ((v[0] | v[1]) & 0x80808080808080) != 0
1263 }
1264 }
1265 }
1266 #[inline]
1267 #[must_use]
1268 pub fn all(self) -> bool {
1269 pick! {
1270 if #[cfg(target_feature="sse2")] {
1271 move_mask_i8_m128i(self.sse) == 0b1111_1111_1111_1111
1272 } else if #[cfg(target_feature="simd128")] {
1273 u8x16_bitmask(self.simd) == 0b1111_1111_1111_1111
1274 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
1275 unsafe {
1276 vmaxvq_s8(self.neon) < 0
1277 }
1278 } else {
1279 let v : [u64;2] = cast(self);
1280 (v[0] & v[1] & 0x80808080808080) == 0x80808080808080
1281 }
1282 }
1283 }
1284
1285 #[inline]
1292 pub fn swizzle(self, rhs: i8x16) -> i8x16 {
1293 pick! {
1294 if #[cfg(target_feature="ssse3")] {
1295 Self { sse: shuffle_av_i8z_all_m128i(self.sse, add_saturating_u8_m128i(rhs.sse, set_splat_i8_m128i(0x70))) }
1296 } else if #[cfg(target_feature="simd128")] {
1297 Self { simd: i8x16_swizzle(self.simd, rhs.simd) }
1298 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
1299 unsafe { Self { neon: vqtbl1q_s8(self.neon, vreinterpretq_u8_s8(rhs.neon)) } }
1300 } else {
1301 let idxs = rhs.to_array();
1302 let arr = self.to_array();
1303 let mut out = [0i8;16];
1304 for i in 0..16 {
1305 let idx = idxs[i] as usize;
1306 if idx >= 16 {
1307 out[i] = 0;
1308 } else {
1309 out[i] = arr[idx];
1310 }
1311 }
1312 Self::new(out)
1313 }
1314 }
1315 }
1316
1317 #[inline]
1326 pub fn swizzle_relaxed(self, rhs: i8x16) -> i8x16 {
1327 pick! {
1328 if #[cfg(target_feature="ssse3")] {
1329 Self { sse: shuffle_av_i8z_all_m128i(self.sse, rhs.sse) }
1330 } else if #[cfg(target_feature="simd128")] {
1331 Self { simd: i8x16_swizzle(self.simd, rhs.simd) }
1332 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))] {
1333 unsafe { Self { neon: vqtbl1q_s8(self.neon, vreinterpretq_u8_s8(rhs.neon)) } }
1334 } else {
1335 let idxs = rhs.to_array();
1336 let arr = self.to_array();
1337 let mut out = [0i8;16];
1338 for i in 0..16 {
1339 let idx = idxs[i] as usize;
1340 if idx >= 16 {
1341 out[i] = 0;
1342 } else {
1343 out[i] = arr[idx];
1344 }
1345 }
1346 Self::new(out)
1347 }
1348 }
1349 }
1350
1351 #[inline]
1352 #[must_use]
1353 pub fn none(self) -> bool {
1354 !self.any()
1355 }
1356
1357 #[inline]
1358 #[must_use]
1359 pub fn saturating_add(self, rhs: Self) -> Self {
1360 pick! {
1361 if #[cfg(target_feature="sse2")] {
1362 Self { sse: add_saturating_i8_m128i(self.sse, rhs.sse) }
1363 } else if #[cfg(target_feature="simd128")] {
1364 Self { simd: i8x16_add_sat(self.simd, rhs.simd) }
1365 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1366 unsafe {Self { neon: vqaddq_s8(self.neon, rhs.neon) }}
1367 } else {
1368 Self { arr: [
1369 self.arr[0].saturating_add(rhs.arr[0]),
1370 self.arr[1].saturating_add(rhs.arr[1]),
1371 self.arr[2].saturating_add(rhs.arr[2]),
1372 self.arr[3].saturating_add(rhs.arr[3]),
1373 self.arr[4].saturating_add(rhs.arr[4]),
1374 self.arr[5].saturating_add(rhs.arr[5]),
1375 self.arr[6].saturating_add(rhs.arr[6]),
1376 self.arr[7].saturating_add(rhs.arr[7]),
1377 self.arr[8].saturating_add(rhs.arr[8]),
1378 self.arr[9].saturating_add(rhs.arr[9]),
1379 self.arr[10].saturating_add(rhs.arr[10]),
1380 self.arr[11].saturating_add(rhs.arr[11]),
1381 self.arr[12].saturating_add(rhs.arr[12]),
1382 self.arr[13].saturating_add(rhs.arr[13]),
1383 self.arr[14].saturating_add(rhs.arr[14]),
1384 self.arr[15].saturating_add(rhs.arr[15]),
1385 ]}
1386 }
1387 }
1388 }
1389 #[inline]
1390 #[must_use]
1391 pub fn saturating_sub(self, rhs: Self) -> Self {
1392 pick! {
1393 if #[cfg(target_feature="sse2")] {
1394 Self { sse: sub_saturating_i8_m128i(self.sse, rhs.sse) }
1395 } else if #[cfg(target_feature="simd128")] {
1396 Self { simd: i8x16_sub_sat(self.simd, rhs.simd) }
1397 } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
1398 unsafe { Self { neon: vqsubq_s8(self.neon, rhs.neon) } }
1399 } else {
1400 Self { arr: [
1401 self.arr[0].saturating_sub(rhs.arr[0]),
1402 self.arr[1].saturating_sub(rhs.arr[1]),
1403 self.arr[2].saturating_sub(rhs.arr[2]),
1404 self.arr[3].saturating_sub(rhs.arr[3]),
1405 self.arr[4].saturating_sub(rhs.arr[4]),
1406 self.arr[5].saturating_sub(rhs.arr[5]),
1407 self.arr[6].saturating_sub(rhs.arr[6]),
1408 self.arr[7].saturating_sub(rhs.arr[7]),
1409 self.arr[8].saturating_sub(rhs.arr[8]),
1410 self.arr[9].saturating_sub(rhs.arr[9]),
1411 self.arr[10].saturating_sub(rhs.arr[10]),
1412 self.arr[11].saturating_sub(rhs.arr[11]),
1413 self.arr[12].saturating_sub(rhs.arr[12]),
1414 self.arr[13].saturating_sub(rhs.arr[13]),
1415 self.arr[14].saturating_sub(rhs.arr[14]),
1416 self.arr[15].saturating_sub(rhs.arr[15]),
1417 ]}
1418 }
1419 }
1420 }
1421
1422 #[inline]
1424 #[must_use]
1425 pub fn saturating_mul(self, rhs: Self) -> Self {
1426 pick! {
1427 if #[cfg(all(target_feature="neon", target_arch="aarch64"))] {
1428 unsafe {
1429 let low_wide_mul = vreinterpretq_s8_s16(
1430 vmull_s8(vget_low_s8(self.neon), vget_low_s8(rhs.neon)),
1431 );
1432 let high_wide_mul = vreinterpretq_s8_s16(
1433 vmull_s8(vget_high_s8(self.neon), vget_high_s8(rhs.neon)),
1434 );
1435 let low_high = vuzpq_s8(low_wide_mul, high_wide_mul);
1436 let low = Self { neon: low_high.0 };
1437 let high = Self { neon: low_high.1 };
1438
1439 let no_overflow = high.simd_eq(low.is_negative());
1440 let limit = Self::MAX ^ (self ^ rhs).is_negative();
1441 no_overflow.blend(low, limit)
1442 }
1443 } else {
1444 let self_array = self.to_array();
1445 let rhs_array = rhs.to_array();
1446
1447 Self::new([
1448 self_array[0].saturating_mul(rhs_array[0]),
1449 self_array[1].saturating_mul(rhs_array[1]),
1450 self_array[2].saturating_mul(rhs_array[2]),
1451 self_array[3].saturating_mul(rhs_array[3]),
1452 self_array[4].saturating_mul(rhs_array[4]),
1453 self_array[5].saturating_mul(rhs_array[5]),
1454 self_array[6].saturating_mul(rhs_array[6]),
1455 self_array[7].saturating_mul(rhs_array[7]),
1456 self_array[8].saturating_mul(rhs_array[8]),
1457 self_array[9].saturating_mul(rhs_array[9]),
1458 self_array[10].saturating_mul(rhs_array[10]),
1459 self_array[11].saturating_mul(rhs_array[11]),
1460 self_array[12].saturating_mul(rhs_array[12]),
1461 self_array[13].saturating_mul(rhs_array[13]),
1462 self_array[14].saturating_mul(rhs_array[14]),
1463 self_array[15].saturating_mul(rhs_array[15]),
1464 ])
1465 }
1466 }
1467 }
1468
1469 integer_fn_saturating_div!([
1470 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
1471 ]);
1472
1473 #[must_use]
1475 #[inline]
1476 pub fn transpose(data: [i8x16; 16]) -> [i8x16; 16] {
1477 #[inline(always)]
1480 fn transpose_column(data: &[i8x16; 16], index: usize) -> i8x16 {
1481 i8x16::new([
1482 data[0].as_array()[index],
1483 data[1].as_array()[index],
1484 data[2].as_array()[index],
1485 data[3].as_array()[index],
1486 data[4].as_array()[index],
1487 data[5].as_array()[index],
1488 data[6].as_array()[index],
1489 data[7].as_array()[index],
1490 data[8].as_array()[index],
1491 data[9].as_array()[index],
1492 data[10].as_array()[index],
1493 data[11].as_array()[index],
1494 data[12].as_array()[index],
1495 data[13].as_array()[index],
1496 data[14].as_array()[index],
1497 data[15].as_array()[index],
1498 ])
1499 }
1500
1501 [
1502 transpose_column(&data, 0),
1503 transpose_column(&data, 1),
1504 transpose_column(&data, 2),
1505 transpose_column(&data, 3),
1506 transpose_column(&data, 4),
1507 transpose_column(&data, 5),
1508 transpose_column(&data, 6),
1509 transpose_column(&data, 7),
1510 transpose_column(&data, 8),
1511 transpose_column(&data, 9),
1512 transpose_column(&data, 10),
1513 transpose_column(&data, 11),
1514 transpose_column(&data, 12),
1515 transpose_column(&data, 13),
1516 transpose_column(&data, 14),
1517 transpose_column(&data, 15),
1518 ]
1519 }
1520
1521 #[inline]
1522 pub fn to_array(self) -> [i8; 16] {
1523 cast(self)
1524 }
1525
1526 #[inline]
1527 pub fn as_array(&self) -> &[i8; 16] {
1528 cast_ref(self)
1529 }
1530
1531 #[inline]
1532 pub fn as_mut_array(&mut self) -> &mut [i8; 16] {
1533 cast_mut(self)
1534 }
1535}