wide/
i64x2_.rs

1use super::*;
2
3pick! {
4  if #[cfg(target_feature="sse2")] {
5    #[derive(Default, Clone, Copy, PartialEq, Eq)]
6    #[repr(C, align(16))]
7    pub struct i64x2 { pub(crate) sse: m128i }
8  } else if #[cfg(target_feature="simd128")] {
9    use core::arch::wasm32::*;
10
11    #[derive(Clone, Copy)]
12    #[repr(transparent)]
13    pub struct i64x2 { pub(crate) simd: v128 }
14
15    impl Default for i64x2 {
16      fn default() -> Self {
17        Self::splat(0)
18      }
19    }
20
21    impl PartialEq for i64x2 {
22      fn eq(&self, other: &Self) -> bool {
23        u64x2_all_true(i64x2_eq(self.simd, other.simd))
24      }
25    }
26
27    impl Eq for i64x2 { }
28  } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
29    use core::arch::aarch64::*;
30    #[repr(C)]
31    #[derive(Copy, Clone)]
32    pub struct i64x2 { pub(crate) neon : int64x2_t }
33
34    impl Default for i64x2 {
35      #[inline]
36      fn default() -> Self {
37        unsafe { Self { neon: vdupq_n_s64(0)} }
38      }
39    }
40
41    impl PartialEq for i64x2 {
42      #[inline]
43      fn eq(&self, other: &Self) -> bool {
44        unsafe {
45          vgetq_lane_s64(self.neon,0) == vgetq_lane_s64(other.neon,0) && vgetq_lane_s64(self.neon,1) == vgetq_lane_s64(other.neon,1)
46        }
47      }
48    }
49
50    impl Eq for i64x2 { }
51  } else {
52    #[derive(Default, Clone, Copy, PartialEq, Eq)]
53    #[repr(C, align(16))]
54    pub struct i64x2 { arr: [i64;2] }
55  }
56}
57
58int_uint_consts!(i64, 2, i64x2, 128);
59
60unsafe impl Zeroable for i64x2 {}
61unsafe impl Pod for i64x2 {}
62
63impl AlignTo for i64x2 {
64  type Elem = i64;
65}
66
67impl Add for i64x2 {
68  type Output = Self;
69  #[inline]
70  fn add(self, rhs: Self) -> Self::Output {
71    pick! {
72      if #[cfg(target_feature="sse2")] {
73        Self { sse: add_i64_m128i(self.sse, rhs.sse) }
74      } else if #[cfg(target_feature="simd128")] {
75        Self { simd: i64x2_add(self.simd, rhs.simd) }
76      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
77        unsafe { Self { neon: vaddq_s64(self.neon, rhs.neon) } }
78      } else {
79        Self { arr: [
80          self.arr[0].wrapping_add(rhs.arr[0]),
81          self.arr[1].wrapping_add(rhs.arr[1]),
82        ]}
83      }
84    }
85  }
86}
87
88impl Sub for i64x2 {
89  type Output = Self;
90  #[inline]
91  fn sub(self, rhs: Self) -> Self::Output {
92    pick! {
93      if #[cfg(target_feature="sse2")] {
94        Self { sse: sub_i64_m128i(self.sse, rhs.sse) }
95      } else if #[cfg(target_feature="simd128")] {
96        Self { simd: i64x2_sub(self.simd, rhs.simd) }
97      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
98        unsafe { Self { neon: vsubq_s64(self.neon, rhs.neon) } }
99      } else {
100        Self { arr: [
101          self.arr[0].wrapping_sub(rhs.arr[0]),
102          self.arr[1].wrapping_sub(rhs.arr[1]),
103        ]}
104      }
105    }
106  }
107}
108
109//we should try to implement this on sse2
110impl Mul for i64x2 {
111  type Output = Self;
112  #[inline]
113  fn mul(self, rhs: Self) -> Self::Output {
114    pick! {
115      if #[cfg(target_feature="simd128")] {
116        Self { simd: i64x2_mul(self.simd, rhs.simd) }
117      } else {
118        let arr1: [i64; 2] = cast(self);
119        let arr2: [i64; 2] = cast(rhs);
120        cast([
121          arr1[0].wrapping_mul(arr2[0]),
122          arr1[1].wrapping_mul(arr2[1]),
123        ])
124      }
125    }
126  }
127}
128
129impl Add<i64> for i64x2 {
130  type Output = Self;
131  #[inline]
132  fn add(self, rhs: i64) -> Self::Output {
133    self.add(Self::splat(rhs))
134  }
135}
136
137impl Sub<i64> for i64x2 {
138  type Output = Self;
139  #[inline]
140  fn sub(self, rhs: i64) -> Self::Output {
141    self.sub(Self::splat(rhs))
142  }
143}
144
145impl Mul<i64> for i64x2 {
146  type Output = Self;
147  #[inline]
148  fn mul(self, rhs: i64) -> Self::Output {
149    self.mul(Self::splat(rhs))
150  }
151}
152
153impl Add<i64x2> for i64 {
154  type Output = i64x2;
155  #[inline]
156  fn add(self, rhs: i64x2) -> Self::Output {
157    i64x2::splat(self).add(rhs)
158  }
159}
160
161impl Sub<i64x2> for i64 {
162  type Output = i64x2;
163  #[inline]
164  fn sub(self, rhs: i64x2) -> Self::Output {
165    i64x2::splat(self).sub(rhs)
166  }
167}
168
169impl Mul<i64x2> for i64 {
170  type Output = i64x2;
171  #[inline]
172  fn mul(self, rhs: i64x2) -> Self::Output {
173    i64x2::splat(self).mul(rhs)
174  }
175}
176
177impl BitAnd for i64x2 {
178  type Output = Self;
179  #[inline]
180  fn bitand(self, rhs: Self) -> Self::Output {
181    pick! {
182      if #[cfg(target_feature="sse2")] {
183        Self { sse: bitand_m128i(self.sse, rhs.sse) }
184      } else if #[cfg(target_feature="simd128")] {
185        Self { simd: v128_and(self.simd, rhs.simd) }
186      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
187        unsafe {Self { neon: vandq_s64(self.neon, rhs.neon) }}
188      } else {
189        Self { arr: [
190          self.arr[0].bitand(rhs.arr[0]),
191          self.arr[1].bitand(rhs.arr[1]),
192        ]}
193      }
194    }
195  }
196}
197
198impl BitOr for i64x2 {
199  type Output = Self;
200  #[inline]
201  fn bitor(self, rhs: Self) -> Self::Output {
202    pick! {
203      if #[cfg(target_feature="sse2")] {
204        Self { sse: bitor_m128i(self.sse, rhs.sse) }
205      } else if #[cfg(target_feature="simd128")] {
206        Self { simd: v128_or(self.simd, rhs.simd) }
207      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
208        unsafe {Self { neon: vorrq_s64(self.neon, rhs.neon) }}
209      } else {
210        Self { arr: [
211          self.arr[0].bitor(rhs.arr[0]),
212          self.arr[1].bitor(rhs.arr[1]),
213        ]}
214      }
215    }
216  }
217}
218
219impl BitXor for i64x2 {
220  type Output = Self;
221  #[inline]
222  fn bitxor(self, rhs: Self) -> Self::Output {
223    pick! {
224      if #[cfg(target_feature="sse2")] {
225        Self { sse: bitxor_m128i(self.sse, rhs.sse) }
226      } else if #[cfg(target_feature="simd128")] {
227        Self { simd: v128_xor(self.simd, rhs.simd) }
228      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
229        unsafe {Self { neon: veorq_s64(self.neon, rhs.neon) }}
230      } else {
231        Self { arr: [
232          self.arr[0].bitxor(rhs.arr[0]),
233          self.arr[1].bitxor(rhs.arr[1]),
234        ]}
235      }
236    }
237  }
238}
239
240/// Shifts lanes by the corresponding lane.
241///
242/// Bitwise shift-left; yields `self << mask(rhs)`, where mask removes any
243/// high-order bits of `rhs` that would cause the shift to exceed the bitwidth
244/// of the type. (same as `wrapping_shl`)
245impl Shl for i64x2 {
246  type Output = Self;
247
248  #[inline]
249  fn shl(self, rhs: Self) -> Self::Output {
250    pick! {
251      if #[cfg(target_feature="avx2")] {
252        // mask the shift count to 63 to have same behavior on all platforms
253        let shift_by = rhs & Self::splat(63);
254        Self { sse: shl_each_u64_m128i(self.sse, shift_by.sse) }
255      } else if #[cfg(all(target_feature="neon", target_arch="aarch64"))] {
256        unsafe {
257          // mask the shift count to 63 to have same behavior on all platforms
258          let shift_by = vandq_s64(rhs.neon, vmovq_n_s64(63));
259          Self { neon: vshlq_s64(self.neon, shift_by) }
260        }
261      } else {
262        let arr: [i64; 2] = cast(self);
263        let rhs: [i64; 2] = cast(rhs);
264        cast([
265          arr[0].wrapping_shl(rhs[0] as u32),
266          arr[1].wrapping_shl(rhs[1] as u32),
267        ])
268      }
269    }
270  }
271}
272
273macro_rules! impl_shl_t_for_i64x2 {
274  ($($shift_type:ty),+ $(,)?) => {
275    $(impl Shl<$shift_type> for i64x2 {
276      type Output = Self;
277      /// Shifts all lanes by the value given.
278      #[inline]
279      fn shl(self, rhs: $shift_type) -> Self::Output {
280        pick! {
281          if #[cfg(target_feature="sse2")] {
282            let shift = cast([rhs as u64, 0]);
283            Self { sse: shl_all_u64_m128i(self.sse, shift) }
284          } else if #[cfg(target_feature="simd128")] {
285            Self { simd: i64x2_shl(self.simd, rhs as u32) }
286          } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
287            unsafe {Self { neon: vshlq_s64(self.neon, vmovq_n_s64(rhs as i64)) }}
288          } else {
289            let u = rhs as u32;
290            Self { arr: [
291              self.arr[0].wrapping_shl(u),
292              self.arr[1].wrapping_shl(u),
293            ]}
294          }
295        }
296      }
297    })+
298  };
299}
300impl_shl_t_for_i64x2!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
301
302/// Shifts lanes by the corresponding lane.
303///
304/// Bitwise shift-right; yields `self >> mask(rhs)`, where mask removes any
305/// high-order bits of `rhs` that would cause the shift to exceed the bitwidth
306/// of the type. (same as `wrapping_shr`)
307impl Shr for i64x2 {
308  type Output = Self;
309
310  #[inline]
311  fn shr(self, rhs: Self) -> Self::Output {
312    pick! {
313      if #[cfg(all(target_feature="neon", target_arch="aarch64"))] {
314        unsafe {
315          // mask the shift count to 63 to have same behavior on all platforms
316          // no right shift, have to pass negative value to left shift on neon
317          let shift_by = vnegq_s64(vandq_s64(rhs.neon, vmovq_n_s64(63)));
318          Self { neon: vshlq_s64(self.neon, shift_by) }
319        }
320      } else {
321        let arr: [i64; 2] = cast(self);
322        let rhs: [i64; 2] = cast(rhs);
323        cast([
324          arr[0].wrapping_shr(rhs[0] as u32),
325          arr[1].wrapping_shr(rhs[1] as u32),
326        ])
327      }
328    }
329  }
330}
331
332macro_rules! impl_shr_t_for_i64x2 {
333  ($($shift_type:ty),+ $(,)?) => {
334    $(impl Shr<$shift_type> for i64x2 {
335      type Output = Self;
336      /// Shifts all lanes by the value given.
337      #[inline]
338      fn shr(self, rhs: $shift_type) -> Self::Output {
339        pick! {
340          if #[cfg(target_feature="simd128")] {
341            Self { simd: i64x2_shr(self.simd, rhs as u32) }
342          } else {
343            let u = rhs as u32;
344            let arr: [i64; 2] = cast(self);
345            cast([
346              arr[0].wrapping_shr(u),
347              arr[1].wrapping_shr(u),
348            ])
349          }
350        }
351      }
352    })+
353  };
354}
355
356impl_shr_t_for_i64x2!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
357
358impl CmpEq for i64x2 {
359  type Output = Self;
360  #[inline]
361  fn simd_eq(self, rhs: Self) -> Self::Output {
362    pick! {
363      if #[cfg(target_feature="sse4.1")] {
364        Self { sse: cmp_eq_mask_i64_m128i(self.sse, rhs.sse) }
365      } else if #[cfg(target_feature="simd128")] {
366        Self { simd: i64x2_eq(self.simd, rhs.simd) }
367      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
368        unsafe {Self { neon: vreinterpretq_s64_u64(vceqq_s64(self.neon, rhs.neon)) }}
369      } else {
370        let s: [i64;2] = cast(self);
371        let r: [i64;2] = cast(rhs);
372        cast([
373          if s[0] == r[0] { -1_i64 } else { 0 },
374          if s[1] == r[1] { -1_i64 } else { 0 },
375        ])
376      }
377    }
378  }
379}
380
381impl CmpGt for i64x2 {
382  type Output = Self;
383  #[inline]
384  fn simd_gt(self, rhs: Self) -> Self::Output {
385    pick! {
386      if #[cfg(target_feature="sse4.2")] {
387        Self { sse: cmp_gt_mask_i64_m128i(self.sse, rhs.sse) }
388      } else if #[cfg(target_feature="simd128")] {
389        Self { simd: i64x2_gt(self.simd, rhs.simd) }
390      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
391        unsafe {Self { neon: vreinterpretq_s64_u64(vcgtq_s64(self.neon, rhs.neon)) }}
392      } else {
393        let s: [i64;2] = cast(self);
394        let r: [i64;2] = cast(rhs);
395        cast([
396          if s[0] > r[0] { -1_i64 } else { 0 },
397          if s[1] > r[1] { -1_i64 } else { 0 },
398        ])
399      }
400    }
401  }
402}
403
404impl CmpLt for i64x2 {
405  type Output = Self;
406  #[inline]
407  fn simd_lt(self, rhs: Self) -> Self::Output {
408    pick! {
409      if #[cfg(target_feature="sse4.2")] {
410        // only has gt, so flip arguments around to get lt
411        Self { sse: cmp_gt_mask_i64_m128i( rhs.sse, self.sse) }
412      } else if #[cfg(target_feature="simd128")] {
413        Self { simd: i64x2_lt(self.simd, rhs.simd) }
414      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
415        unsafe {Self { neon: vreinterpretq_s64_u64(vcltq_s64(self.neon, rhs.neon)) }}
416      } else {
417        let s: [i64;2] = cast(self);
418        let r: [i64;2] = cast(rhs);
419        cast([
420          if s[0] < r[0] { -1_i64 } else { 0 },
421          if s[1] < r[1] { -1_i64 } else { 0 },
422        ])
423      }
424    }
425  }
426}
427
428impl i64x2 {
429  #[inline]
430  #[must_use]
431  pub const fn new(array: [i64; 2]) -> Self {
432    unsafe { core::mem::transmute(array) }
433  }
434  #[inline]
435  #[must_use]
436  pub fn blend(self, t: Self, f: Self) -> Self {
437    pick! {
438      if #[cfg(target_feature="sse4.1")] {
439        Self { sse: blend_varying_i8_m128i(f.sse, t.sse, self.sse) }
440      } else if #[cfg(target_feature="simd128")] {
441        Self { simd: v128_bitselect(t.simd, f.simd, self.simd) }
442      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
443        unsafe {Self { neon: vbslq_s64(vreinterpretq_u64_s64(self.neon), t.neon, f.neon) }}
444      } else {
445        generic_bit_blend(self, t, f)
446      }
447    }
448  }
449
450  #[inline]
451  #[must_use]
452  pub fn abs(self) -> Self {
453    pick! {
454      // x86 doesn't have this builtin
455      if #[cfg(target_feature="simd128")] {
456        Self { simd: i64x2_abs(self.simd) }
457      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
458        unsafe {Self { neon: vabsq_s64(self.neon) }}
459      } else {
460        let arr: [i64; 2] = cast(self);
461        cast(
462          [
463            arr[0].wrapping_abs(),
464            arr[1].wrapping_abs(),
465          ])
466      }
467    }
468  }
469
470  #[inline]
471  #[must_use]
472  pub fn unsigned_abs(self) -> u64x2 {
473    pick! {
474      // x86 doesn't have this builtin
475      if #[cfg(target_feature="simd128")] {
476        u64x2 { simd: i64x2_abs(self.simd) }
477      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
478        unsafe {u64x2 { neon: vreinterpretq_u64_s64(vabsq_s64(self.neon)) }}
479      } else {
480        let arr: [i64; 2] = cast(self);
481        cast(
482          [
483            arr[0].unsigned_abs(),
484            arr[1].unsigned_abs(),
485          ])
486      }
487    }
488  }
489
490  #[inline]
491  #[must_use]
492  pub fn round_float(self) -> f64x2 {
493    let arr: [i64; 2] = cast(self);
494    cast([arr[0] as f64, arr[1] as f64])
495  }
496
497  /// returns the bit mask for each high bit set in the vector with the lowest
498  /// lane being the lowest bit
499  #[inline]
500  #[must_use]
501  pub fn to_bitmask(self) -> u32 {
502    pick! {
503      if #[cfg(target_feature="sse")] {
504        // use f64 move_mask since it is the same size as i64
505        move_mask_m128d(cast(self.sse)) as u32
506      } else if #[cfg(target_feature="simd128")] {
507        i64x2_bitmask(self.simd) as u32
508      } else {
509        // nothing amazingly efficient for neon
510        let arr: [u64; 2] = cast(self);
511        (arr[0] >> 63 | ((arr[1] >> 62) & 2)) as u32
512      }
513    }
514  }
515
516  /// true if any high bits are set for any value in the vector
517  #[inline]
518  #[must_use]
519  pub fn any(self) -> bool {
520    pick! {
521      if #[cfg(target_feature="sse")] {
522        // use f64 move_mask since it is the same size as i64
523        move_mask_m128d(cast(self.sse)) != 0
524      } else if #[cfg(target_feature="simd128")] {
525        i64x2_bitmask(self.simd) != 0
526      } else {
527        let v : [u64;2] = cast(self);
528        ((v[0] | v[1]) & 0x8000000000000000) != 0
529      }
530    }
531  }
532
533  /// true if all high bits are set for every value in the vector
534  #[inline]
535  #[must_use]
536  pub fn all(self) -> bool {
537    pick! {
538      if #[cfg(target_feature="avx2")] {
539        // use f64 move_mask since it is the same size as i64
540        move_mask_m128d(cast(self.sse)) == 0b11
541      }  else if #[cfg(target_feature="simd128")] {
542        i64x2_bitmask(self.simd) == 0b11
543      } else {
544        let v : [u64;2] = cast(self);
545        ((v[0] & v[1]) & 0x8000000000000000) == 0x8000000000000000
546      }
547    }
548  }
549
550  /// true if no high bits are set for any values of the vector
551  #[inline]
552  #[must_use]
553  pub fn none(self) -> bool {
554    !self.any()
555  }
556
557  #[inline]
558  pub fn to_array(self) -> [i64; 2] {
559    cast(self)
560  }
561
562  #[inline]
563  pub fn as_array(&self) -> &[i64; 2] {
564    cast_ref(self)
565  }
566
567  #[inline]
568  pub fn as_mut_array(&mut self) -> &mut [i64; 2] {
569    cast_mut(self)
570  }
571
572  #[inline]
573  #[must_use]
574  pub fn min(self, rhs: Self) -> Self {
575    self.simd_lt(rhs).blend(self, rhs)
576  }
577
578  #[inline]
579  #[must_use]
580  pub fn max(self, rhs: Self) -> Self {
581    self.simd_gt(rhs).blend(self, rhs)
582  }
583}