1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
use crate::SIMD_LEN;
use std::simd::cmp::SimdPartialEq;
use std::simd::Mask;
use std::simd::Simd;
use std::simd::SimdElement;
use std::slice;

pub trait EqSimd<'a, T>
where
    T: SimdElement + std::cmp::PartialEq,
    Simd<T, SIMD_LEN>: SimdPartialEq<Mask = Mask<T::Mask, SIMD_LEN>>,
{
    fn eq_simd(&self, other: &Self) -> bool;
}

impl<'a, T> EqSimd<'a, T> for slice::Iter<'a, T>
where
    T: SimdElement + std::cmp::PartialEq,
    Simd<T, SIMD_LEN>: SimdPartialEq<Mask = Mask<T::Mask, SIMD_LEN>>,
{
    fn eq_simd(&self, other: &Self) -> bool {
        let a = self.as_slice();
        let b = other.as_slice();
        // This could save lots of time, but not sure if it's actually worth it
        if a.len() != b.len() {
            return false;
        }
        let (a_prefix, a_simd_chunk, a_suffix) = a.as_simd::<SIMD_LEN>();
        let (b_prefix, b_simd_chunk, b_suffix) = b.as_simd::<SIMD_LEN>();
        // Prefix
        if a_prefix.iter().ne(b_prefix.iter()) {
            return false;
        }
        // SIMD
        for (a_simd, b_simd) in a_simd_chunk.iter().zip(b_simd_chunk) {
            // Note that we use not equal
            if a_simd.simd_ne(*b_simd).to_bitmask() != 0 {
                return false;
            }
        }
        // Suffix
        a_suffix.iter().eq(b_suffix)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use rand::distributions::Standard;
    use rand::prelude::Distribution;
    use rand::Rng;
    use std::fmt::Debug;

    fn test_for_type<T>()
    where
        T: rand::distributions::uniform::SampleUniform
            + PartialEq
            + Debug
            + Copy
            + Default
            + SimdElement
            + std::cmp::PartialEq,
        Simd<T, SIMD_LEN>: SimdPartialEq<Mask = Mask<T::Mask, SIMD_LEN>>,
        Standard: Distribution<T>,
    {
        for len in 0..1000 {
            for _ in 0..5 {
                let mut v: Vec<T> = vec![T::default(); len];
                let mut rng = rand::thread_rng();
                for x in v.iter_mut() {
                    *x = rng.gen()
                }
                let mut v2: Vec<T> = vec![T::default(); len];
                let mut rng = rand::thread_rng();
                for x in v2.iter_mut() {
                    *x = rng.gen()
                }

                let ans = v.iter().eq_simd(&v2.iter());
                let correct = v.iter().eq(&v2);

                assert_eq!(
                    ans,
                    correct,
                    "Failed for length {} and type {:?}",
                    len,
                    std::any::type_name::<T>()
                );
            }
        }
    }

    #[test]
    fn test_eq_simd() {
        test_for_type::<i8>();
        test_for_type::<i16>();
        test_for_type::<i32>();
        test_for_type::<i64>();
        test_for_type::<u8>();
        test_for_type::<u16>();
        test_for_type::<u32>();
        test_for_type::<u64>();
        test_for_type::<usize>();
        test_for_type::<isize>();
        test_for_type::<f32>();
        test_for_type::<f64>();
    }
}