rten_simd/
ops.rs

1//! Traits for operations on SIMD vectors.
2//!
3//! The entry point is the [`Isa`] trait, an implementation of which is passed
4//! to SIMD operations when evaluated. This has methods for each of the
5//! supported element types which returns the implementation of operations on
6//! SIMD vectors with that element type.
7//!
8//! The [`NumOps`] trait provides operations available on all element types. The
9//! sub-traits [`FloatOps`] and [`SignedIntOps`] provide additional operations
10//! on float and signed integer element types. Additionally there are traits for
11//! individual operations such as [`Extend`] or [`NarrowSaturate`] which are
12//! available on a subset of element types.
13
14use std::mem::MaybeUninit;
15
16use crate::elem::Elem;
17use crate::simd::{Mask, Simd};
18
19/// Entry point for performing SIMD operations using a particular Instruction
20/// Set Architecture (ISA).
21///
22/// Implementations of this trait are types which can only be instantiated
23/// if the instruction set is available. They are usually zero-sized and thus
24/// free to copy.
25///
26/// # Safety
27///
28/// Implementations must ensure they can only be constructed if the
29/// instruction set is supported on the current system.
30pub unsafe trait Isa: Copy {
31    /// SIMD vector with an unspecified element type. This is used for
32    /// bitwise casting between different vector types.
33    type Bits: Simd;
34
35    /// Mask vector for 32-bit lanes.
36    type M32: Mask;
37
38    /// Mask vector for 16-bit lanes.
39    type M16: Mask;
40
41    /// Mask vector for 8-bit lanes.
42    type M8: Mask;
43
44    /// SIMD vector with `f32` elements.
45    type F32: Simd<Elem = f32, Isa = Self, Mask = Self::M32>;
46
47    /// SIMD vector with `i32` elements.
48    type I32: Simd<Elem = i32, Isa = Self, Mask = Self::M32>;
49
50    /// SIMD vector with `i16` elements.
51    type I16: Simd<Elem = i16, Isa = Self, Mask = Self::M16>;
52
53    /// SIMD vector with `i8` elements.
54    type I8: Simd<Elem = i8, Isa = Self, Mask = Self::M8>;
55
56    /// SIMD vector with `u8` elements.
57    type U8: Simd<Elem = u8, Isa = Self, Mask = Self::M8>;
58
59    /// SIMD vector with `u16` elements.
60    type U16: Simd<Elem = u16, Isa = Self, Mask = Self::M16>;
61
62    /// SIMD vector with `u32` elements.
63    type U32: Simd<Elem = u32, Isa = Self, Mask = Self::M32>;
64
65    /// Operations on SIMD vectors with `f32` elements.
66    fn f32(self) -> impl FloatOps<f32, Simd = Self::F32, Int = Self::I32>;
67
68    /// Operations on SIMD vectors with `i32` elements.
69    fn i32(
70        self,
71    ) -> impl SignedIntOps<i32, Simd = Self::I32>
72    + NarrowSaturate<i32, i16, Output = Self::I16>
73    + Concat<i32>
74    + ToFloat<i32, Output = Self::F32>;
75
76    /// Operations on SIMD vectors with `i16` elements.
77    fn i16(
78        self,
79    ) -> impl SignedIntOps<i16, Simd = Self::I16>
80    + NarrowSaturate<i16, u8, Output = Self::U8>
81    + Extend<i16, Output = Self::I32>
82    + Interleave<i16>;
83
84    /// Operations on SIMD vectors with `i8` elements.
85    fn i8(
86        self,
87    ) -> impl SignedIntOps<i8, Simd = Self::I8> + Extend<i8, Output = Self::I16> + Interleave<i8>;
88
89    /// Operations on SIMD vectors with `u8` elements.
90    fn u8(
91        self,
92    ) -> impl IntOps<u8, Simd = Self::U8> + Extend<u8, Output = Self::U16> + Interleave<u8>;
93
94    /// Operations on SIMD vectors with `u16` elements.
95    fn u16(self) -> impl IntOps<u16, Simd = Self::U16>;
96
97    /// Operations on mask vectors for 32-bit lanes.
98    fn m32(self) -> impl MaskOps<Self::M32>;
99
100    /// Operations on mask vectors for 16-bit lanes.
101    fn m16(self) -> impl MaskOps<Self::M16>;
102
103    /// Operations on mask vectors for 8-bit lanes.
104    fn m8(self) -> impl MaskOps<Self::M8>;
105}
106
107/// Get the [`NumOps`] implementation from an [`Isa`] for a given element type.
108///
109/// This trait is useful for writing SIMD operations which are generic over the
110/// element type. It is implemented for all of the element types supported in
111/// SIMD vectors.
112///
113/// # Example
114///
115/// This example shows how to use [`GetNumOps`] to write a vectorized `Sum`
116/// operation.
117///
118/// ```
119/// use rten_simd::{Isa, SimdIterable, SimdOp};
120/// use rten_simd::ops::{GetNumOps, NumOps};
121///
122/// struct Sum<'a, T>(&'a [T]);
123///
124/// impl<T: std::ops::Add<Output=T> + GetNumOps> SimdOp for Sum<'_, T> {
125///   type Output = T;
126///   
127///   #[inline(always)]
128///   fn eval<I: Isa>(self, isa: I) -> Self::Output {
129///     let ops = T::num_ops(isa);
130///
131///     // Build `ops.len()` partial sums in parallel. If the slice length is
132///     // not a multiple of `ops.len()` it will be padded with zeros.
133///     let mut sum = ops.zero();
134///     for chunk in self.0.simd_iter_pad(ops) {
135///         sum = ops.add(sum, chunk);
136///     }
137///
138///     // Horizontally reduce the SIMD vector containing partial sums to a
139///     // single value.
140///     ops.sum(sum)
141///   }
142/// }
143///
144/// let vals: Vec<_> = (1..20i32).collect();
145/// let sum = Sum(&vals).dispatch();
146/// assert_eq!(sum, vals.iter().sum());
147/// ```
148pub trait GetNumOps
149where
150    Self: GetSimd + 'static,
151{
152    /// Return the [`NumOps`] implementation from a SIMD [`Isa`] that provides
153    /// operations on vectors containing elements of type `Self`.
154    fn num_ops<I: Isa>(isa: I) -> impl NumOps<Self, Simd = Self::Simd<I>>;
155}
156
157macro_rules! impl_get_ops {
158    ($trait:ty, $method:ident, $ops:ident, $type:ident) => {
159        impl $trait for $type {
160            fn $method<I: Isa>(isa: I) -> impl $ops<Self, Simd = <Self as GetSimd>::Simd<I>> {
161                isa.$type()
162            }
163        }
164    };
165}
166impl_get_ops!(GetNumOps, num_ops, NumOps, f32);
167impl_get_ops!(GetNumOps, num_ops, NumOps, i16);
168impl_get_ops!(GetNumOps, num_ops, NumOps, i32);
169impl_get_ops!(GetNumOps, num_ops, NumOps, i8);
170impl_get_ops!(GetNumOps, num_ops, NumOps, u16);
171impl_get_ops!(GetNumOps, num_ops, NumOps, u8);
172
173/// Get the [`Simd`] implementation from an [`Isa`] for a given element type.
174///
175/// For example the type `<f32 as GetSimd>::Simd<I>` yields `I::F32` where
176/// `I` is an `Isa`. This trait is used for example by
177/// [`SimdUnaryOp`](crate::SimdUnaryOp) to determine the type of SIMD vector
178/// that corresponds to the element type.
179pub trait GetSimd: Elem {
180    type Simd<I: Isa>: Simd<Elem = Self, Isa = I>;
181}
182
183macro_rules! impl_getsimd {
184    ($ty:ty, $simd:ident) => {
185        impl GetSimd for $ty {
186            type Simd<I: Isa> = I::$simd;
187        }
188    };
189}
190impl_getsimd!(f32, F32);
191impl_getsimd!(i16, I16);
192impl_getsimd!(i32, I32);
193impl_getsimd!(i8, I8);
194impl_getsimd!(u16, U16);
195impl_getsimd!(u8, U8);
196
197/// Get the [`FloatOps`] implementation from an [`Isa`] for a given element type.
198///
199/// This is a specialization of [`GetNumOps`] for float element types.
200pub trait GetFloatOps
201where
202    Self: GetSimd,
203{
204    fn float_ops<I: Isa>(isa: I) -> impl FloatOps<Self, Simd = Self::Simd<I>>;
205}
206impl_get_ops!(GetFloatOps, float_ops, FloatOps, f32);
207
208/// Get the [`IntOps`] implementation from an [`Isa`] for a given element type.
209///
210/// This is a specialization of [`GetNumOps`] for signed integer element types.
211pub trait GetIntOps
212where
213    Self: GetSimd,
214{
215    fn int_ops<I: Isa>(isa: I) -> impl IntOps<Self, Simd = Self::Simd<I>>;
216}
217impl_get_ops!(GetIntOps, int_ops, IntOps, i16);
218impl_get_ops!(GetIntOps, int_ops, IntOps, i32);
219impl_get_ops!(GetIntOps, int_ops, IntOps, i8);
220impl_get_ops!(GetIntOps, int_ops, IntOps, u8);
221impl_get_ops!(GetIntOps, int_ops, IntOps, u16);
222
223/// Get the [`SignedIntOps`] implementation from an [`Isa`] for a given element type.
224///
225/// This is a specialization of [`GetNumOps`] for signed integer element types.
226pub trait GetSignedIntOps
227where
228    Self: GetSimd,
229{
230    fn signed_int_ops<I: Isa>(isa: I) -> impl SignedIntOps<Self, Simd = Self::Simd<I>>;
231}
232impl_get_ops!(GetSignedIntOps, signed_int_ops, SignedIntOps, i32);
233impl_get_ops!(GetSignedIntOps, signed_int_ops, SignedIntOps, i16);
234impl_get_ops!(GetSignedIntOps, signed_int_ops, SignedIntOps, i8);
235
236/// SIMD operations on a [`Mask`] vector.
237///
238/// # Safety
239///
240/// Implementations must ensure they can only be constructed if the
241/// instruction set is supported on the current system.
242pub unsafe trait MaskOps<M: Mask>: Copy {
243    /// Compute `x & y`.
244    fn and(self, x: M, y: M) -> M;
245
246    /// Return true if any lanes are true.
247    fn any(self, x: M) -> bool;
248
249    /// Return true if all lanes are false.
250    fn all_false(self, x: M) -> bool {
251        !self.any(x)
252    }
253
254    /// Return true if all lanes are true.
255    fn all(self, x: M) -> bool;
256}
257
258/// Operations available on all SIMD vector types.
259///
260/// This trait provides core operations available on all SIMD vector types.
261///
262/// - Load from and store into memory
263/// - Creating a new vector filled with zeros or a specific value
264/// - Combining elements from two vectors according to a mask
265/// - Add, subtract and multiply
266/// - Comparison (equality, less than, greater than etc.)
267///
268/// # Safety
269///
270/// Implementations must ensure they can only be constructed if the
271/// instruction set is supported on the current system.
272#[allow(clippy::len_without_is_empty)]
273pub unsafe trait NumOps<T: Elem>: Copy {
274    /// SIMD vector containing lanes of type `T`.
275    type Simd: Simd<Elem = T>;
276
277    /// Convert `x` to an untyped vector of the same width.
278    #[allow(clippy::wrong_self_convention)]
279    fn from_bits(self, x: <<Self::Simd as Simd>::Isa as Isa>::Bits) -> Self::Simd {
280        Self::Simd::from_bits(x)
281    }
282
283    /// Return the number of elements in the vector.
284    fn len(self) -> usize;
285
286    /// Compute `x + y`.
287    fn add(self, x: Self::Simd, y: Self::Simd) -> Self::Simd;
288
289    /// Compute `x - y`.
290    fn sub(self, x: Self::Simd, y: Self::Simd) -> Self::Simd;
291
292    /// Compute `x * y`.
293    fn mul(self, x: Self::Simd, y: Self::Simd) -> Self::Simd;
294
295    /// Create a new vector with all lanes set to zero.
296    fn zero(self) -> Self::Simd {
297        self.splat(T::default())
298    }
299
300    /// Create a new vector with all lanes set to one.
301    fn one(self) -> Self::Simd {
302        self.splat(T::one())
303    }
304
305    /// Compute `a * b + c`.
306    ///
307    /// This will use fused multiply-add instructions if available. For float
308    /// element types, this may use one or two roundings.
309    fn mul_add(self, a: Self::Simd, b: Self::Simd, c: Self::Simd) -> Self::Simd {
310        self.add(self.mul(a, b), c)
311    }
312
313    /// Broadcast the element from one lane of a vector to all lanes of a new
314    /// vector.
315    fn broadcast_lane<const LANE: i32>(self, x: Self::Simd) -> Self::Simd {
316        let val = x.to_array()[LANE as usize];
317        self.splat(val)
318    }
319
320    /// Evaluate a polynomial using Horner's method.
321    ///
322    /// Computes `x * coeffs[0] + x^2 * coeffs[1] ... x^n * coeffs[N]`
323    #[inline]
324    fn poly_eval(self, x: Self::Simd, coeffs: &[Self::Simd]) -> Self::Simd {
325        let mut y = coeffs[coeffs.len() - 1];
326        for i in (0..coeffs.len() - 1).rev() {
327            y = self.mul_add(y, x, coeffs[i]);
328        }
329        self.mul(y, x)
330    }
331
332    /// Return a mask indicating whether elements in `x` are less than `y`.
333    #[inline]
334    fn lt(self, x: Self::Simd, y: Self::Simd) -> <Self::Simd as Simd>::Mask {
335        self.gt(y, x)
336    }
337
338    /// Return a mask indicating whether elements in `x` are less or equal to `y`.
339    #[inline]
340    fn le(self, x: Self::Simd, y: Self::Simd) -> <Self::Simd as Simd>::Mask {
341        self.ge(y, x)
342    }
343
344    /// Return a mask indicating whether elements in `x` are equal to `y`.
345    fn eq(self, x: Self::Simd, y: Self::Simd) -> <Self::Simd as Simd>::Mask;
346
347    /// Return a mask indicating whether elements in `x` are greater or equal to `y`.
348    fn ge(self, x: Self::Simd, y: Self::Simd) -> <Self::Simd as Simd>::Mask;
349
350    /// Return a mask indicating whether elements in `x` are greater than `y`.
351    fn gt(self, x: Self::Simd, y: Self::Simd) -> <Self::Simd as Simd>::Mask;
352
353    /// Return the minimum of `x` and `y` for each lane.
354    fn min(self, x: Self::Simd, y: Self::Simd) -> Self::Simd {
355        self.select(x, y, self.le(x, y))
356    }
357
358    /// Return the maximum of `x` and `y` for each lane.
359    fn max(self, x: Self::Simd, y: Self::Simd) -> Self::Simd {
360        self.select(x, y, self.ge(x, y))
361    }
362
363    /// Clamp values in `x` to minimum and maximum values from corresponding
364    /// lanes in `min` and `max`.
365    fn clamp(self, x: Self::Simd, min: Self::Simd, max: Self::Simd) -> Self::Simd {
366        self.min(self.max(x, min), max)
367    }
368
369    /// Return the bitwise AND of `x` and `y`.
370    fn and(self, x: Self::Simd, y: Self::Simd) -> Self::Simd;
371
372    /// Return the bitwise NOT of `x`.
373    fn not(self, x: Self::Simd) -> Self::Simd;
374
375    /// Return the bitwise OR of `x` and `y`.
376    fn or(self, x: Self::Simd, y: Self::Simd) -> Self::Simd;
377
378    /// Return the bitwise XOR of `x` and `y`.
379    fn xor(self, x: Self::Simd, y: Self::Simd) -> Self::Simd;
380
381    /// Create a new vector with all lanes set to `x`.
382    fn splat(self, x: T) -> Self::Simd;
383
384    /// Reduce the elements in `x` to a single value using `f`, then
385    /// return a new vector with the accumulated value broadcast to each lane.
386    #[inline]
387    fn fold_splat<F: Fn(T, T) -> T>(self, x: Self::Simd, accum: T, f: F) -> Self::Simd {
388        let reduced = x.to_array().into_iter().fold(accum, f);
389        self.splat(reduced)
390    }
391
392    /// Return a mask with the first `n` lanes set to true.
393    fn first_n_mask(self, n: usize) -> <Self::Simd as Simd>::Mask;
394
395    /// Load the first `self.len()` elements from a slice into a vector.
396    ///
397    /// Panics if `xs.len() < self.len()`.
398    #[inline]
399    #[track_caller]
400    fn load(self, xs: &[T]) -> Self::Simd {
401        assert!(
402            xs.len() >= self.len(),
403            "slice length {} too short for SIMD vector width {}",
404            xs.len(),
405            self.len()
406        );
407        unsafe { self.load_ptr(xs.as_ptr()) }
408    }
409
410    /// Load `N` vectors from consecutive sub-slices of `xs`.
411    ///
412    /// Panics if `xs.len() < self.len() * N`.
413    #[inline]
414    #[track_caller]
415    fn load_many<const N: usize>(self, xs: &[T]) -> [Self::Simd; N] {
416        let v_len = self.len();
417        assert!(
418            xs.len() >= v_len * N,
419            "slice length {} too short for {} * SIMD vector width {}",
420            xs.len(),
421            N,
422            v_len
423        );
424        // Safety: `xs.add(i * v_len)` points to at least `v_len` elements.
425        std::array::from_fn(|i| unsafe { self.load_ptr(xs.as_ptr().add(i * v_len)) })
426    }
427
428    /// Load elements from `xs` into a vector.
429    ///
430    /// If the vector length exceeds `xs.len()`, the tail is padded with zeros.
431    ///
432    /// Returns the padded vector and a mask of the lanes which were set.
433    #[inline]
434    fn load_pad(self, xs: &[T]) -> (Self::Simd, <Self::Simd as Simd>::Mask) {
435        let n = xs.len().min(self.len());
436        let mask = self.first_n_mask(n);
437
438        // Safety: `xs.add(i)` is valid for all positions where mask is set
439        let vec = unsafe { self.load_ptr_mask(xs.as_ptr(), mask) };
440
441        (vec, mask)
442    }
443
444    /// Load vector of elements from `ptr`.
445    ///
446    /// `ptr` is not required to have any particular alignment.
447    ///
448    /// # Safety
449    ///
450    /// `ptr` must point to `self.len()` initialized elements of type `T`.
451    unsafe fn load_ptr(self, ptr: *const T) -> Self::Simd;
452
453    /// Load vector elements from `ptr` using a mask.
454    ///
455    /// `ptr` is not required to have any particular alignment.
456    ///
457    /// # Safety
458    ///
459    /// For each mask position `i` which is true, `ptr.add(i)` must point to
460    /// an initialized element of type `T`.
461    unsafe fn load_ptr_mask(self, ptr: *const T, mask: <Self::Simd as Simd>::Mask) -> Self::Simd;
462
463    /// Select elements from `x` or `y` according to a mask.
464    ///
465    /// Elements are selected from `x` where the corresponding mask element
466    /// is one or `y` if zero.
467    fn select(self, x: Self::Simd, y: Self::Simd, mask: <Self::Simd as Simd>::Mask) -> Self::Simd;
468
469    /// Store the values in this vector to a memory location.
470    ///
471    /// # Safety
472    ///
473    /// `ptr` must point to `self.len()` elements.
474    unsafe fn store_ptr(self, x: Self::Simd, ptr: *mut T);
475
476    /// Store `x` into the first `self.len()` elements of `xs`.
477    #[inline]
478    fn store(self, x: Self::Simd, xs: &mut [T]) {
479        assert!(xs.len() >= self.len());
480        unsafe { self.store_ptr(x, xs.as_mut_ptr()) }
481    }
482
483    /// Store `x` into the first `self.len()` elements of `xs`.
484    ///
485    /// This is a variant of [`store`](NumOps::store) which takes an
486    /// uninitialized slice as input and returns the initialized portion of the
487    /// slice.
488    #[inline]
489    fn store_uninit(self, x: Self::Simd, xs: &mut [MaybeUninit<T>]) -> &mut [T] {
490        let len = self.len();
491        let xs_ptr = xs.as_mut_ptr() as *mut T;
492        assert!(xs.len() >= len);
493        unsafe {
494            self.store_ptr(x, xs_ptr);
495
496            // Safety: `store_ptr` initialized `len` elements of `xs`.
497            std::slice::from_raw_parts_mut(xs_ptr, len)
498        }
499    }
500
501    /// Store the values in this vector to a memory location, where the
502    /// corresponding mask element is set.
503    ///
504    /// # Safety
505    ///
506    /// For each position `i` in the mask which is true, `ptr.add(i)` must point
507    /// to a valid element of type `Self::Elem`.
508    unsafe fn store_ptr_mask(self, x: Self::Simd, ptr: *mut T, mask: <Self::Simd as Simd>::Mask);
509
510    /// Pre-fetch the cache line containing `ptr` for reading.
511    fn prefetch(self, ptr: *const T) {
512        // Default implementation does nothing
513        let _ = ptr;
514    }
515
516    /// Pre-fetch the cache line containing `ptr` for writing.
517    fn prefetch_write(self, ptr: *mut T) {
518        // Default implementation does nothing
519        let _ = ptr;
520    }
521
522    /// Horizontally sum the elements in a vector.
523    ///
524    /// If the sum overflows, it will wrap. This choice was made to enable
525    /// consistency between native intrinsics for horizontal addition and the
526    /// generic implementation.
527    fn sum(self, x: Self::Simd) -> T {
528        let mut sum = T::default();
529        for elem in x.to_array() {
530            sum = sum.wrapping_add(elem);
531        }
532        sum
533    }
534}
535
536/// Operations available on SIMD vectors with float elements.
537pub trait FloatOps<T: Elem>: NumOps<T> {
538    /// Integer SIMD vector of the same bit-width as this vector.
539    type Int: Simd;
540
541    /// Compute x / y
542    fn div(self, x: Self::Simd, y: Self::Simd) -> Self::Simd;
543
544    /// Compute 1. / x
545    fn reciprocal(self, x: Self::Simd) -> Self::Simd {
546        self.div(self.one(), x)
547    }
548
549    /// Compute `-x`
550    fn neg(self, x: Self::Simd) -> Self::Simd {
551        self.sub(self.zero(), x)
552    }
553
554    /// Compute the absolute value of `x`
555    fn abs(self, x: Self::Simd) -> Self::Simd {
556        self.select(self.neg(x), x, self.lt(x, self.zero()))
557    }
558
559    /// Round `x` to the nearest integer value, with ties to even.
560    ///
561    /// This is like [`f32::round_ties_even`].
562    fn round_ties_even(self, x: Self::Simd) -> Self::Simd;
563
564    /// Compute `c - a * b`.
565    fn mul_sub_from(self, a: Self::Simd, b: Self::Simd, c: Self::Simd) -> Self::Simd {
566        self.sub(c, self.mul(a, b))
567    }
568
569    /// Convert each lane to an integer of the same width, rounding towards zero.
570    fn to_int_trunc(self, x: Self::Simd) -> Self::Int;
571
572    /// Convert each lane to an integer of the same width, rounding to nearest
573    /// with ties to even.
574    fn to_int_round(self, x: Self::Simd) -> Self::Int;
575}
576
577/// Operations on SIMD vectors with integer elements.
578pub trait IntOps<T: Elem>: NumOps<T> {
579    /// Shift each lane in `x` left by `SHIFT` bits.
580    fn shift_left<const SHIFT: i32>(self, x: Self::Simd) -> Self::Simd;
581
582    /// Shift each lane in `x` right by `SHIFT` bits.
583    ///
584    /// For signed integer types this is an arithmetic shift, so shifting a
585    /// negative number right will preserve the sign, like the `>>` operator.
586    fn shift_right<const SHIFT: i32>(self, x: Self::Simd) -> Self::Simd;
587}
588
589/// Operations on SIMD vectors with signed integer elements.
590pub trait SignedIntOps<T: Elem>: IntOps<T> {
591    /// Compute the absolute value of `x`
592    fn abs(self, x: Self::Simd) -> Self::Simd {
593        self.select(self.neg(x), x, self.lt(x, self.zero()))
594    }
595
596    /// Return `-x`.
597    fn neg(self, x: Self::Simd) -> Self::Simd {
598        self.sub(self.zero(), x)
599    }
600}
601
602/// Widen lanes to a type with twice the width.
603///
604/// For integer types, the extended type has the same signed-ness.
605pub trait Extend<T: Elem>: NumOps<T> {
606    /// SIMD vector type with elements that have twice the bit-width of
607    /// those in `Self::SIMD`.
608    type Output;
609
610    /// Extend each lane to a type with twice the width.
611    ///
612    /// Returns a tuple containing the extended low and high half of the input.
613    fn extend(self, x: Self::Simd) -> (Self::Output, Self::Output);
614}
615
616/// Interleave elements from the low or high halves of two vectors to form a
617/// new vector.
618pub trait Interleave<T: Elem>: NumOps<T> {
619    /// Interleave elements from the low halves of two vectors.
620    fn interleave_low(self, a: Self::Simd, b: Self::Simd) -> Self::Simd;
621
622    /// Interleave elements from the high halves of two vectors.
623    fn interleave_high(self, a: Self::Simd, b: Self::Simd) -> Self::Simd;
624}
625
626/// Concatenate elements from the low or high halves of two vectors to form a
627/// new vector.
628pub trait Concat<T: Elem>: NumOps<T> {
629    /// Concatenate elements from the low halves of two vectors.
630    fn concat_low(self, a: Self::Simd, b: Self::Simd) -> Self::Simd;
631
632    /// Concatenate elements from the high halves of two vectors.
633    fn concat_high(self, a: Self::Simd, b: Self::Simd) -> Self::Simd;
634}
635
636/// Convert each lane to a float with the same bit width.
637pub trait ToFloat<T: Elem>: NumOps<T> {
638    type Output;
639
640    fn to_float(self, x: Self::Simd) -> Self::Output;
641}
642
643/// Narrow lanes to one with half the bit-width, using truncation.
644///
645/// For integer types, the narrowed type has the same signed-ness.
646#[cfg(target_arch = "x86_64")]
647pub(crate) trait Narrow<S: Simd> {
648    type Output;
649
650    /// Truncate each lane in a pair of vectors to one with half the bit-width.
651    ///
652    /// Returns a vector containing the concatenation of the narrowed lanes
653    /// from `low` followed by the narrowed lanes from `high`.
654    fn narrow_truncate(self, low: S, high: S) -> Self::Output;
655}
656
657/// Narrow lanes to one with half the bit-width, using saturation.
658///
659/// Conceptually, this converts each element from `S1::Elem` to `S2::Elem` using
660/// `x.clamp(S2::Elem::MIN as S1::Elem, S2::Elem::MAX as S1::Elem) as S2::Elem`.
661pub trait NarrowSaturate<T: Elem, U: Elem>: NumOps<T> {
662    type Output: Simd<Elem = U>;
663
664    /// Narrow each lane in a pair of vectors to one with half the bit-width.
665    ///
666    /// Returns a vector containing the concatenation of the narrowed lanes
667    /// from `low` followed by the narrowed lanes from `high`.
668    fn narrow_saturate(self, low: Self::Simd, high: Self::Simd) -> Self::Output;
669}
670
671#[cfg(test)]
672mod tests {
673    use crate::elem::WrappingAdd;
674    use crate::ops::{
675        Concat, Extend, FloatOps, IntOps, Interleave, MaskOps, NarrowSaturate, NumOps,
676        SignedIntOps, ToFloat,
677    };
678    use crate::{Isa, Mask, Simd, SimdOp, assert_simd_eq, assert_simd_ne, test_simd_op};
679
680    // Generate tests for operations available on all numeric types.
681    macro_rules! test_num_ops {
682        ($modname:ident, $elem:ident, $mask_elem:ident) => {
683            mod $modname {
684                use super::{
685                    Isa, MaskOps, NumOps, Simd, SimdOp, WrappingAdd, assert_simd_eq,
686                    assert_simd_ne, test_simd_op,
687                };
688
689                #[test]
690                fn test_load_store() {
691                    test_simd_op!(isa, {
692                        let ops = isa.$elem();
693
694                        let src: Vec<_> = (0..ops.len() * 4).map(|x| x as $elem).collect();
695                        let mut dst = vec![0 as $elem; src.len()];
696
697                        for (src_chunk, dst_chunk) in
698                            src.chunks(ops.len()).zip(dst.chunks_mut(ops.len()))
699                        {
700                            let x = ops.load(src_chunk);
701                            ops.store(x, dst_chunk);
702                        }
703
704                        assert_eq!(dst, src);
705                    })
706                }
707
708                #[test]
709                fn test_store_uninit() {
710                    test_simd_op!(isa, {
711                        let ops = isa.$elem();
712
713                        let src: Vec<_> = (0..ops.len() + 3).map(|x| x as $elem).collect();
714                        let mut dest = Vec::with_capacity(src.len());
715
716                        let x = ops.load(&src);
717
718                        let init = ops.store_uninit(x, dest.spare_capacity_mut());
719                        assert_eq!(init, &src[0..ops.len()]);
720                    })
721                }
722
723                #[test]
724                fn test_load_many() {
725                    test_simd_op!(isa, {
726                        let ops = isa.$elem();
727
728                        let src: Vec<_> = (0..ops.len() * 2).map(|x| x as $elem).collect();
729
730                        let xs = ops.load_many::<2>(&src);
731                        assert_simd_eq!(xs[0], ops.load(&src));
732                        assert_simd_eq!(xs[1], ops.load(&src[ops.len()..]));
733                    })
734                }
735
736                #[test]
737                fn test_load_pad() {
738                    test_simd_op!(isa, {
739                        let ops = isa.$elem();
740
741                        // Array which is shorter than vector length for all ISAs.
742                        let src = [0, 1, 2].map(|x| x as $elem);
743
744                        let (vec, _mask) = ops.load_pad(&src);
745                        let vec_array = vec.to_array();
746                        let vec_slice = vec_array.as_ref();
747
748                        assert_eq!(&vec_slice[..src.len()], &src);
749                        for i in ops.len()..vec_slice.len() {
750                            assert_eq!(vec_array[i], 0 as $elem);
751                        }
752                    })
753                }
754
755                #[test]
756                fn test_bin_ops() {
757                    test_simd_op!(isa, {
758                        let ops = isa.$elem();
759
760                        let a = 2 as $elem;
761                        let b = 3 as $elem;
762
763                        let x = ops.splat(a);
764                        let y = ops.splat(b);
765
766                        // Add
767                        let expected = ops.splat(a + b);
768                        let actual = ops.add(x, y);
769                        assert_simd_eq!(actual, expected);
770
771                        // Sub
772                        let expected = ops.splat(b - a);
773                        let actual = ops.sub(y, x);
774                        assert_simd_eq!(actual, expected);
775
776                        // Mul
777                        let expected = ops.splat(a * b);
778                        let actual = ops.mul(x, y);
779                        assert_simd_eq!(actual, expected);
780                    })
781                }
782
783                #[test]
784                fn test_cmp_ops() {
785                    test_simd_op!(isa, {
786                        let ops = isa.$elem();
787                        let mo = isa.$mask_elem();
788
789                        let x = ops.splat(1 as $elem);
790                        let y = ops.splat(2 as $elem);
791
792                        assert!(mo.all(ops.eq(x, x)));
793                        assert!(mo.all_false(ops.eq(x, y)));
794                        assert!(mo.all(ops.le(x, x)));
795                        assert!(mo.all(ops.le(x, y)));
796                        assert!(mo.all_false(ops.le(y, x)));
797                        assert!(mo.all(ops.ge(x, x)));
798                        assert!(mo.all_false(ops.ge(x, y)));
799                        assert!(mo.all_false(ops.gt(x, y)));
800                        assert!(mo.all(ops.gt(y, x)));
801                    })
802                }
803
804                #[test]
805                fn test_mul_add() {
806                    test_simd_op!(isa, {
807                        let ops = isa.$elem();
808
809                        let a = ops.splat(2 as $elem);
810                        let b = ops.splat(3 as $elem);
811                        let c = ops.splat(4 as $elem);
812
813                        let actual = ops.mul_add(a, b, c);
814                        let expected = ops.splat(((2. * 3.) + 4.) as $elem);
815
816                        assert_simd_eq!(actual, expected);
817                    })
818                }
819
820                #[test]
821                fn test_min_max() {
822                    test_simd_op!(isa, {
823                        let ops = isa.$elem();
824
825                        let x = ops.splat(3 as $elem);
826
827                        // Min
828                        let y_min = ops.min(x, ops.splat(2 as $elem));
829                        let y_min_2 = ops.min(ops.splat(2 as $elem), x);
830                        assert_simd_eq!(y_min, y_min_2);
831                        assert_simd_eq!(y_min, ops.splat(2 as $elem));
832
833                        // Max
834                        let y_max = ops.max(x, ops.splat(4 as $elem));
835                        let y_max_2 = ops.max(ops.splat(4 as $elem), x);
836                        assert_simd_eq!(y_max, y_max_2);
837                        assert_simd_eq!(y_max, ops.splat(4 as $elem));
838
839                        // Clamp
840                        let y_clamped = ops.clamp(x, ops.splat(0 as $elem), ops.splat(4 as $elem));
841                        assert_simd_eq!(y_clamped, ops.splat(3 as $elem));
842                    })
843                }
844
845                #[test]
846                fn test_and() {
847                    test_simd_op!(isa, {
848                        let ops = isa.$elem();
849                        let zeros = ops.zero();
850                        let ones = ops.not(zeros);
851
852                        // Cast to bits here because all-ones is a NaN if elements
853                        // are floats, and NaNs are not equal to themselves.
854                        assert_simd_eq!(ops.and(zeros, zeros).to_bits(), zeros.to_bits());
855                        assert_simd_eq!(ops.and(zeros, ones).to_bits(), zeros.to_bits());
856                        assert_simd_eq!(ops.and(ones, zeros).to_bits(), zeros.to_bits());
857                        assert_simd_eq!(ops.and(ones, ones).to_bits(), ones.to_bits());
858                    })
859                }
860
861                #[test]
862                fn test_not() {
863                    test_simd_op!(isa, {
864                        let ops = isa.$elem();
865                        let zeros = ops.zero();
866                        let ones = ops.not(zeros);
867                        assert_simd_ne!(zeros, ones);
868
869                        let zeros_2 = ops.not(ones);
870                        assert_simd_eq!(zeros_2, zeros);
871                    })
872                }
873
874                #[test]
875                fn test_or() {
876                    test_simd_op!(isa, {
877                        let ops = isa.$elem();
878                        let zeros = ops.zero();
879                        let ones = ops.not(zeros);
880
881                        // Cast to bits here because all-ones is a NaN if elements
882                        // are floats, and NaNs are not equal to themselves.
883                        assert_simd_eq!(ops.or(zeros, zeros).to_bits(), zeros.to_bits());
884                        assert_simd_eq!(ops.or(zeros, ones).to_bits(), ones.to_bits());
885                        assert_simd_eq!(ops.or(ones, zeros).to_bits(), ones.to_bits());
886                        assert_simd_eq!(ops.or(ones, ones).to_bits(), ones.to_bits());
887                    })
888                }
889
890                #[test]
891                fn test_xor() {
892                    test_simd_op!(isa, {
893                        let ops = isa.$elem();
894
895                        let zeros = ops.zero();
896                        let ones = ops.not(zeros);
897
898                        // Cast to bits here because all-ones is a NaN if the
899                        // element type is a float, and NaNs are not equal to
900                        // themselves.
901                        assert_simd_eq!(ops.xor(zeros, zeros).to_bits(), zeros.to_bits());
902                        assert_simd_eq!(ops.xor(ones, ones).to_bits(), zeros.to_bits());
903                        assert_simd_eq!(ops.xor(zeros, ones).to_bits(), ones.to_bits());
904                        assert_simd_eq!(ops.xor(ones, zeros).to_bits(), ones.to_bits());
905                    })
906                }
907
908                #[test]
909                fn test_sum() {
910                    test_simd_op!(isa, {
911                        let ops = isa.$elem();
912
913                        let vec: Vec<_> = (0..ops.len()).map(|x| x as $elem).collect();
914                        let expected = vec
915                            .iter()
916                            .fold(0 as $elem, |sum, x| WrappingAdd::wrapping_add(sum, *x));
917
918                        let x = ops.load(&vec);
919                        let y = ops.sum(x);
920
921                        assert_eq!(y, expected);
922                    })
923                }
924
925                #[test]
926                fn test_poly_eval() {
927                    test_simd_op!(isa, {
928                        let ops = isa.$elem();
929
930                        let coeffs = [2, 3, 4].map(|x| x as $elem);
931                        let x = 2 as $elem;
932                        let y = ops.poly_eval(ops.splat(x), &coeffs.map(|c| ops.splat(c)));
933
934                        let expected =
935                            (x * coeffs[0]) + (x * x * coeffs[1]) + (x * x * x * coeffs[2]);
936                        assert_simd_eq!(y, ops.splat(expected));
937                    })
938                }
939
940                #[test]
941                fn test_broadcast_lane() {
942                    test_simd_op!(isa, {
943                        let ops = isa.$elem();
944
945                        let vec: Vec<_> = (0..ops.len()).map(|x| x as $elem).collect();
946                        let xs = ops.load(&vec);
947
948                        let ys = ops.broadcast_lane::<0>(xs);
949                        assert_simd_eq!(ops.splat(0 as $elem), ys);
950
951                        let ys = ops.broadcast_lane::<1>(xs);
952                        assert_simd_eq!(ops.splat(1 as $elem), ys);
953
954                        let ys = ops.broadcast_lane::<2>(xs);
955                        assert_simd_eq!(ops.splat(2 as $elem), ys);
956
957                        let ys = ops.broadcast_lane::<3>(xs);
958                        assert_simd_eq!(ops.splat(3 as $elem), ys);
959                    });
960                }
961            }
962        };
963    }
964
965    test_num_ops!(num_ops_f32, f32, m32);
966    test_num_ops!(num_ops_i32, i32, m32);
967    test_num_ops!(num_ops_i16, i16, m16);
968    test_num_ops!(num_ops_i8, i8, m8);
969    test_num_ops!(num_ops_u8, u8, m8);
970    test_num_ops!(num_ops_u16, u16, m16);
971
972    // Test that x8 multiply truncates result as expected.
973    #[test]
974    fn test_i8_mul_truncate() {
975        test_simd_op!(isa, {
976            let ops = isa.i8();
977
978            let x = 17i8;
979            let y = 19i8;
980
981            let x_vec = ops.splat(x);
982            let y_vec = ops.splat(y);
983            let expected = ops.splat(x.wrapping_mul(y));
984            let actual = ops.mul(x_vec, y_vec);
985
986            assert_simd_eq!(actual, expected);
987        })
988    }
989
990    #[test]
991    fn test_u8_mul_truncate() {
992        test_simd_op!(isa, {
993            let ops = isa.u8();
994
995            let x = 17u8;
996            let y = 19u8;
997
998            let x_vec = ops.splat(x);
999            let y_vec = ops.splat(y);
1000            let expected = ops.splat(x.wrapping_mul(y));
1001            let actual = ops.mul(x_vec, y_vec);
1002
1003            assert_simd_eq!(actual, expected);
1004        })
1005    }
1006
1007    // Generate tests for operations available on all float types.
1008    macro_rules! test_float_ops {
1009        ($modname:ident, $elem:ident, $int_elem:ident) => {
1010            mod $modname {
1011                use super::{FloatOps, Isa, NumOps, Simd, SimdOp, assert_simd_eq, test_simd_op};
1012
1013                #[test]
1014                fn test_div() {
1015                    test_simd_op!(isa, {
1016                        let ops = isa.$elem();
1017
1018                        let x = ops.splat(1.);
1019                        let y = ops.splat(2.);
1020                        let expected = ops.splat(0.5);
1021                        let actual = ops.div(x, y);
1022                        assert_simd_eq!(actual, expected);
1023                    })
1024                }
1025
1026                #[test]
1027                fn test_reciprocal() {
1028                    test_simd_op!(isa, {
1029                        let ops = isa.$elem();
1030
1031                        let vals = [-5., -2., 2., 5.];
1032                        for v in vals {
1033                            let x = ops.splat(v);
1034                            let y = ops.reciprocal(x);
1035                            let expected = ops.splat(1. / v);
1036                            assert_simd_eq!(y, expected);
1037                        }
1038                    })
1039                }
1040
1041                #[test]
1042                fn test_abs() {
1043                    test_simd_op!(isa, {
1044                        let ops = isa.$elem();
1045
1046                        let vals = [-1., 0., 1.];
1047                        for v in vals {
1048                            let x = ops.splat(v);
1049                            let y = ops.abs(x);
1050                            let expected = ops.splat(v.abs());
1051                            assert_simd_eq!(y, expected);
1052                        }
1053                    })
1054                }
1055
1056                #[test]
1057                fn test_neg() {
1058                    test_simd_op!(isa, {
1059                        let ops = isa.$elem();
1060
1061                        let x = ops.splat(3 as $elem);
1062
1063                        let expected = ops.splat(-3 as $elem);
1064                        let actual = ops.neg(x);
1065                        assert_simd_eq!(actual, expected);
1066                    })
1067                }
1068
1069                #[test]
1070                fn test_mul_sub_from() {
1071                    test_simd_op!(isa, {
1072                        let ops = isa.$elem();
1073
1074                        let a = ops.splat(2 as $elem);
1075                        let b = ops.splat(3 as $elem);
1076                        let c = ops.splat(4 as $elem);
1077
1078                        let actual = ops.mul_sub_from(a, b, c);
1079                        let expected = ops.splat((-(2. * 3.) + 4.) as $elem);
1080
1081                        assert_simd_eq!(actual, expected);
1082                    })
1083                }
1084
1085                #[test]
1086                fn test_round_ties_even() {
1087                    test_simd_op!(isa, {
1088                        let ops = isa.$elem();
1089
1090                        let x = ops.splat(3.5 as $elem);
1091
1092                        let expected = ops.splat(4 as $elem);
1093                        let actual = ops.round_ties_even(x);
1094                        assert_simd_eq!(actual, expected);
1095                    })
1096                }
1097
1098                #[test]
1099                fn test_to_int_trunc() {
1100                    test_simd_op!(isa, {
1101                        let ops = isa.$elem();
1102
1103                        let x = ops.splat(12.345);
1104                        let y = ops.to_int_trunc(x);
1105                        let expected = isa.$int_elem().splat(12);
1106                        assert_simd_eq!(y, expected);
1107                    })
1108                }
1109            }
1110        };
1111    }
1112
1113    test_float_ops!(float_ops_f32, f32, i32);
1114
1115    // Generate tests for operations available on unsigned integer types.
1116    macro_rules! test_unsigned_int_ops {
1117        ($modname:ident, $elem:ident) => {
1118            mod $modname {
1119                use super::{IntOps, Isa, NumOps, Simd, SimdOp, assert_simd_eq, test_simd_op};
1120
1121                #[test]
1122                fn test_shift_left() {
1123                    test_simd_op!(isa, {
1124                        let ops = isa.$elem();
1125
1126                        let x = ops.splat(42);
1127                        let y = ops.shift_left::<1>(x);
1128                        let expected = ops.splat(42 << 1);
1129                        assert_simd_eq!(y, expected);
1130                    })
1131                }
1132
1133                #[test]
1134                fn test_shift_right() {
1135                    test_simd_op!(isa, {
1136                        let ops = isa.$elem();
1137
1138                        let x = ops.splat(42);
1139                        let y = ops.shift_right::<1>(x);
1140                        let expected = ops.splat(42 >> 1);
1141                        assert_simd_eq!(y, expected);
1142                    })
1143                }
1144            }
1145        };
1146    }
1147
1148    test_unsigned_int_ops!(uint_ops_u16, u16);
1149
1150    // Generate tests for operations available on signed integer types.
1151    macro_rules! test_signed_int_ops {
1152        ($modname:ident, $elem:ident) => {
1153            mod $modname {
1154                use super::{
1155                    IntOps, Isa, NumOps, SignedIntOps, Simd, SimdOp, assert_simd_eq, test_simd_op,
1156                };
1157
1158                #[test]
1159                fn test_abs() {
1160                    test_simd_op!(isa, {
1161                        let ops = isa.$elem();
1162
1163                        let vals = [-1, 0, 1];
1164                        for v in vals {
1165                            let x = ops.splat(v);
1166                            let y = ops.abs(x);
1167                            let expected = ops.splat(v.abs());
1168                            assert_simd_eq!(y, expected);
1169                        }
1170                    })
1171                }
1172
1173                // Add / Sub / Mul with a negative argument.
1174                #[test]
1175                fn test_bin_ops_neg() {
1176                    test_simd_op!(isa, {
1177                        let ops = isa.$elem();
1178
1179                        let a = -2 as $elem;
1180                        let b = 3 as $elem;
1181
1182                        let x = ops.splat(a);
1183                        let y = ops.splat(b);
1184
1185                        // Add
1186                        let expected = ops.splat(a + b);
1187                        let actual = ops.add(x, y);
1188                        assert_simd_eq!(actual, expected);
1189
1190                        // Sub
1191                        let expected = ops.splat(b - a);
1192                        let actual = ops.sub(y, x);
1193                        assert_simd_eq!(actual, expected);
1194
1195                        // Mul
1196                        let expected = ops.splat(a * b);
1197                        let actual = ops.mul(x, y);
1198                        assert_simd_eq!(actual, expected);
1199                    })
1200                }
1201
1202                #[test]
1203                fn test_shift_left() {
1204                    test_simd_op!(isa, {
1205                        let ops = isa.$elem();
1206
1207                        let x = ops.splat(42);
1208                        let y = ops.shift_left::<1>(x);
1209                        let expected = ops.splat(42 << 1);
1210                        assert_simd_eq!(y, expected);
1211                    })
1212                }
1213
1214                #[test]
1215                fn test_shift_right() {
1216                    test_simd_op!(isa, {
1217                        let ops = isa.$elem();
1218
1219                        let x = ops.splat(42);
1220                        let y = ops.shift_right::<1>(x);
1221                        let expected = ops.splat(42 >> 1);
1222                        assert_simd_eq!(y, expected);
1223
1224                        // `shift_right` is an arithmetic right shift, so it
1225                        // preserves the sign.
1226                        let x = ops.splat(-128);
1227                        let y = ops.shift_right::<1>(x);
1228                        let expected = ops.splat(-64);
1229                        assert_simd_eq!(y, expected);
1230                    })
1231                }
1232
1233                #[test]
1234                fn test_neg() {
1235                    test_simd_op!(isa, {
1236                        let ops = isa.$elem();
1237
1238                        let x = ops.splat(3 as $elem);
1239
1240                        let expected = ops.splat(-3 as $elem);
1241                        let actual = ops.neg(x);
1242                        assert_simd_eq!(actual, expected);
1243                    })
1244                }
1245            }
1246        };
1247    }
1248
1249    test_signed_int_ops!(int_ops_i32, i32);
1250    test_signed_int_ops!(int_ops_i16, i16);
1251    test_signed_int_ops!(int_ops_i8, i8);
1252
1253    // For small positive values, signed comparison ops will work on unsigned
1254    // values. Make sure we really are using unsigned comparison.
1255    #[test]
1256    fn test_cmp_gt_ge_u16() {
1257        test_simd_op!(isa, {
1258            let ops = isa.u16();
1259            let m16 = isa.m16();
1260
1261            let x = ops.splat(i16::MAX as u16);
1262            let y = ops.splat(i16::MAX as u16 + 1);
1263
1264            assert!(m16.all(ops.gt(y, x)));
1265            assert!(m16.all(ops.ge(y, x)));
1266        });
1267    }
1268
1269    #[test]
1270    fn test_cmp_gt_ge_u8() {
1271        test_simd_op!(isa, {
1272            let ops = isa.u8();
1273            let m8 = isa.m8();
1274
1275            let x = ops.splat(i8::MAX as u8);
1276            let y = ops.splat(i8::MAX as u8 + 1);
1277
1278            assert!(m8.all(ops.gt(y, x)));
1279            assert!(m8.all(ops.ge(y, x)));
1280        });
1281    }
1282
1283    macro_rules! test_mask_ops {
1284        ($elem_type:ident, $mask_type:ident) => {
1285            test_simd_op!(isa, {
1286                let ops = isa.$elem_type();
1287                let mask_ops = isa.$mask_type();
1288
1289                // First-n mask
1290                let ones = ops.first_n_mask(ops.len());
1291                let zeros = ops.first_n_mask(0);
1292                let first = ops.first_n_mask(1);
1293
1294                // Bitwise and
1295                assert_simd_eq!(mask_ops.and(ones, ones), ones);
1296                assert_simd_eq!(mask_ops.and(first, ones), first);
1297                assert_simd_eq!(mask_ops.and(first, zeros), zeros);
1298
1299                // Any
1300                assert!(mask_ops.any(ones));
1301                assert!(mask_ops.any(first));
1302                assert!(!mask_ops.any(zeros));
1303
1304                // All
1305                assert!(mask_ops.all(ones));
1306                assert!(!mask_ops.all(zeros));
1307                assert!(!mask_ops.all(first));
1308
1309                // All false
1310                assert!(mask_ops.all_false(zeros));
1311                assert!(!mask_ops.all_false(ones));
1312                assert!(!mask_ops.all_false(first));
1313            });
1314        };
1315    }
1316
1317    #[test]
1318    fn test_mask_ops_m32() {
1319        test_mask_ops!(i32, m32);
1320    }
1321
1322    #[test]
1323    fn test_mask_ops_m16() {
1324        test_mask_ops!(i16, m16);
1325    }
1326
1327    #[test]
1328    fn test_mask_ops_m8() {
1329        test_mask_ops!(i8, m8);
1330    }
1331
1332    macro_rules! test_narrow_saturate {
1333        ($test_name:ident, $src:ident, $dest:ident) => {
1334            #[test]
1335            fn $test_name() {
1336                test_simd_op!(isa, {
1337                    let ops = isa.$src();
1338
1339                    let src: Vec<$src> = (0..ops.len() * 2).map(|x| x as $src).collect();
1340                    let expected: Vec<$dest> = src
1341                        .iter()
1342                        .map(|&x| x.clamp($dest::MIN as $src, $dest::MAX as $src) as $dest)
1343                        .collect();
1344
1345                    let x_low = ops.load(&src[..ops.len()]);
1346                    let x_high = ops.load(&src[ops.len()..]);
1347                    let y = ops.narrow_saturate(x_low, x_high);
1348
1349                    assert_eq!(y.to_array().as_ref(), expected);
1350                });
1351            }
1352        };
1353    }
1354
1355    test_narrow_saturate!(test_narrow_i32_i16, i32, i16);
1356    test_narrow_saturate!(test_narrow_u16_u8, i16, u8);
1357
1358    macro_rules! test_extend {
1359        ($test_name:ident, $src:ident, $dest:ident) => {
1360            #[test]
1361            fn $test_name() {
1362                test_simd_op!(isa, {
1363                    let ops = isa.$src();
1364                    let dst_ops = isa.$dest();
1365
1366                    let src: Vec<$src> = (0..ops.len()).map(|x| x as $src).collect();
1367                    let expected: Vec<$dest> = src.iter().map(|&x| x as $dest).collect();
1368
1369                    let x = ops.load(&src);
1370                    let (y_low, y_high) = ops.extend(x);
1371                    assert_eq!(y_low.to_array().as_ref(), &expected[..dst_ops.len()]);
1372                    assert_eq!(y_high.to_array().as_ref(), &expected[dst_ops.len()..]);
1373                });
1374            }
1375        };
1376    }
1377    test_extend!(test_extend_i8_i16, i8, i16);
1378    test_extend!(test_extend_i16_i32, i16, i32);
1379    test_extend!(test_extend_u8_u16, u8, u16);
1380
1381    macro_rules! test_interleave {
1382        ($test_name:ident, $elem:ident) => {
1383            #[test]
1384            fn $test_name() {
1385                test_simd_op!(isa, {
1386                    let ops = isa.$elem();
1387
1388                    let even: Vec<_> = (0..ops.len()).map(|x| x as $elem * 2).collect();
1389                    let even = ops.load(&even);
1390
1391                    let odd: Vec<_> = (0..ops.len()).map(|x| 1 + (x as $elem * 2)).collect();
1392                    let odd = ops.load(&odd);
1393
1394                    let expected_low: Vec<_> = (0..ops.len()).map(|x| x as $elem).collect();
1395                    let expected_high: Vec<_> = (0..ops.len())
1396                        .map(|x| ops.len() as $elem + x as $elem)
1397                        .collect();
1398
1399                    let y_low = ops.interleave_low(even, odd);
1400                    let y_high = ops.interleave_high(even, odd);
1401                    assert_eq!(y_low.to_array().as_ref(), expected_low);
1402                    assert_eq!(y_high.to_array().as_ref(), expected_high);
1403                });
1404            }
1405        };
1406    }
1407    test_interleave!(test_interleave_i16, i16);
1408    test_interleave!(test_interleave_i8, i8);
1409    test_interleave!(test_interleave_u8, u8);
1410
1411    #[test]
1412    fn test_concat_i32() {
1413        test_simd_op!(isa, {
1414            let ops = isa.i32();
1415            let src: Vec<_> = (0..ops.len() * 2).map(|x| x as i32).collect();
1416            let src_a = &src[..ops.len()];
1417            let src_b = &src[ops.len()..];
1418
1419            let half_len = ops.len() / 2;
1420            let expected_low: Vec<_> = (0..ops.len())
1421                .map(|i| {
1422                    if i < half_len {
1423                        src_a[i]
1424                    } else {
1425                        src_b[i - half_len]
1426                    }
1427                })
1428                .collect();
1429            let expected_hi: Vec<_> = (0..ops.len())
1430                .map(|i| {
1431                    if i < half_len {
1432                        src_a[half_len + i]
1433                    } else {
1434                        src_b[i]
1435                    }
1436                })
1437                .collect();
1438
1439            let a = ops.load(&src_a);
1440            let b = ops.load(&src_b);
1441            let ab_lo = ops.concat_low(a, b);
1442            let ab_hi = ops.concat_high(a, b);
1443
1444            assert_eq!(ab_lo.to_array().as_ref(), expected_low);
1445            assert_eq!(ab_hi.to_array().as_ref(), expected_hi);
1446        });
1447    }
1448
1449    #[test]
1450    fn test_reinterpret_cast() {
1451        test_simd_op!(isa, {
1452            let x = 1.456f32;
1453            let x_i32 = x.to_bits() as i32;
1454
1455            let x_vec = isa.f32().splat(x);
1456            let y_vec: I::I32 = x_vec.reinterpret_cast();
1457
1458            let expected = isa.i32().splat(x_i32);
1459            assert_simd_eq!(y_vec, expected);
1460        })
1461    }
1462
1463    #[test]
1464    fn test_to_float_i32() {
1465        test_simd_op!(isa, {
1466            let x = isa.i32().splat(42);
1467            let y = isa.i32().to_float(x);
1468
1469            let expected = isa.f32().splat(42.0);
1470            assert_simd_eq!(y, expected);
1471        });
1472    }
1473}