ggmath/vector/primitive_impls/i32/
b128_sse2.rs

1use_core_arch_x86! {
2    __m128i,
3    _mm_add_epi32,
4    _mm_and_si128,
5    _mm_or_si128,
6    _mm_set1_epi32,
7    _mm_set_epi32,
8    _mm_sll_epi32,
9    _mm_sub_epi32,
10    _mm_sra_epi32,
11    _mm_xor_si128,
12}
13
14use core::arch::asm;
15
16use crate::{
17    SimdBehaviour, Vec2, Vec3, Vec4, Vector,
18    vector::{SoundVectorRepr, primitive_impls::use_core_arch_x86, vec2, vec3, vec4},
19};
20
21// SAFETY: __m128i can contain exactly 4 i32s
22unsafe impl SoundVectorRepr<4, i32> for __m128i {}
23
24// SAFETY: __m128i can contain exactly 4 i32s, so it does begin with 3 i32s
25unsafe impl SoundVectorRepr<3, i32> for __m128i {}
26
27////////////////////////////////////////////////////////////////////////////////
28// Vector4
29////////////////////////////////////////////////////////////////////////////////
30
31impl SimdBehaviour<4> for i32 {
32    type VectorRepr = __m128i;
33
34    #[inline(always)]
35    fn vec_from_array(array: [Self; 4]) -> Vec4<Self> {
36        Vector::from_repr(unsafe { _mm_set_epi32(array[3], array[2], array[1], array[0]) })
37    }
38
39    #[inline(always)]
40    fn vec_splat(value: Self) -> Vec4<Self> {
41        Vector::from_repr(unsafe { _mm_set1_epi32(value) })
42    }
43
44    #[inline(always)]
45    unsafe fn vec_swizzle2<const X_SRC: usize, const Y_SRC: usize>(vec: Vec4<Self>) -> Vec2<Self> {
46        vec2!(vec[X_SRC], vec[Y_SRC])
47    }
48
49    #[inline(always)]
50    unsafe fn vec_swizzle3<const X_SRC: usize, const Y_SRC: usize, const Z_SRC: usize>(
51        vec: Vec4<Self>,
52    ) -> Vec3<Self> {
53        let result_as_vec4 = vec.swizzle4::<X_SRC, Y_SRC, Z_SRC, Z_SRC>();
54
55        Vector::from_repr(result_as_vec4.repr())
56    }
57
58    #[inline(always)]
59    unsafe fn vec_swizzle4<
60        const X_SRC: usize,
61        const Y_SRC: usize,
62        const Z_SRC: usize,
63        const W_SRC: usize,
64    >(
65        vec: Vec4<Self>,
66    ) -> Vec4<Self> {
67        let result: __m128i;
68        // SAFETY: pshufd is part of sse2, so it is safe to use here.
69        unsafe {
70            asm!("pshufd {0}, {0}, {1}", inout(xmm_reg) vec.repr() => result, const {
71                let x_src_bits = (X_SRC as u32) << 0;
72                let y_src_bits = (Y_SRC as u32) << 2;
73                let z_src_bits = (Z_SRC as u32) << 4;
74                let w_src_bits = (W_SRC as u32) << 6;
75
76                (x_src_bits | y_src_bits | z_src_bits | w_src_bits).cast_signed()
77            });
78        }
79
80        Vector::from_repr(result)
81    }
82
83    // TODO: optimize eq and ne once masks are implemented
84
85    #[inline(always)]
86    fn vec_neg(vec: Vec4<Self>) -> Vec4<Self> {
87        Vector::from_repr(unsafe { _mm_sub_epi32(vec4!(0).repr(), vec.repr()) })
88    }
89
90    #[inline(always)]
91    fn vec_not(vec: Vec4<Self>) -> Vec4<Self> {
92        Vector::from_repr(unsafe { _mm_xor_si128(vec.repr(), vec4!(-1).repr()) })
93    }
94
95    #[inline(always)]
96    fn vec_add(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
97        if cfg!(debug_assertions) {
98            vec4!(vec.x + rhs.x, vec.y + rhs.y, vec.z + rhs.z, vec.w + rhs.w)
99        } else {
100            Vector::from_repr(unsafe { _mm_add_epi32(vec.repr(), rhs.repr()) })
101        }
102    }
103
104    #[inline(always)]
105    fn vec_sub(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
106        if cfg!(debug_assertions) {
107            vec4!(vec.x - rhs.x, vec.y - rhs.y, vec.z - rhs.z, vec.w - rhs.w)
108        } else {
109            Vector::from_repr(unsafe { _mm_sub_epi32(vec.repr(), rhs.repr()) })
110        }
111    }
112
113    #[inline(always)]
114    fn vec_mul(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
115        // TODO: determine if this can be optimized
116
117        vec4!(vec.x * rhs.x, vec.y * rhs.y, vec.z * rhs.z, vec.w * rhs.w)
118    }
119
120    #[inline(always)]
121    fn vec_div(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
122        vec4!(vec.x / rhs.x, vec.y / rhs.y, vec.z / rhs.z, vec.w / rhs.w)
123    }
124
125    #[inline(always)]
126    fn vec_rem(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
127        vec4!(vec.x % rhs.x, vec.y % rhs.y, vec.z % rhs.z, vec.w % rhs.w)
128    }
129
130    #[inline(always)]
131    fn vec_shl(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
132        Vector::from_repr(unsafe { _mm_sll_epi32(vec.repr(), rhs.repr()) })
133    }
134
135    #[inline(always)]
136    fn vec_shr(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
137        Vector::from_repr(unsafe { _mm_sra_epi32(vec.repr(), rhs.repr()) })
138    }
139
140    #[inline(always)]
141    fn vec_bitand(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
142        Vector::from_repr(unsafe { _mm_and_si128(vec.repr(), rhs.repr()) })
143    }
144
145    #[inline(always)]
146    fn vec_bitor(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
147        Vector::from_repr(unsafe { _mm_or_si128(vec.repr(), rhs.repr()) })
148    }
149
150    #[inline(always)]
151    fn vec_bitxor(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
152        Vector::from_repr(unsafe { _mm_xor_si128(vec.repr(), rhs.repr()) })
153    }
154}
155
156////////////////////////////////////////////////////////////////////////////////
157// Vector3
158////////////////////////////////////////////////////////////////////////////////
159
160impl SimdBehaviour<3> for i32 {
161    type VectorRepr = __m128i;
162
163    #[inline(always)]
164    fn vec_from_array(array: [Self; 3]) -> Vec3<Self> {
165        Vector::from_repr(unsafe { _mm_set_epi32(array[2], array[2], array[1], array[0]) })
166    }
167
168    #[inline(always)]
169    fn vec_splat(value: Self) -> Vec3<Self> {
170        Vector::from_repr(unsafe { _mm_set1_epi32(value) })
171    }
172
173    #[inline(always)]
174    unsafe fn vec_swizzle2<const X_SRC: usize, const Y_SRC: usize>(vec: Vec3<Self>) -> Vec2<Self> {
175        vec2!(vec[X_SRC], vec[Y_SRC])
176    }
177
178    #[inline(always)]
179    unsafe fn vec_swizzle3<const X_SRC: usize, const Y_SRC: usize, const Z_SRC: usize>(
180        vec: Vec3<Self>,
181    ) -> Vec3<Self> {
182        let vec_as_vec4 = Vec4::<Self>::from_repr(vec.repr());
183        let result_as_vec4 = vec_as_vec4.swizzle4::<X_SRC, Y_SRC, Z_SRC, Z_SRC>();
184
185        Vector::from_repr(result_as_vec4.repr())
186    }
187
188    #[inline(always)]
189    unsafe fn vec_swizzle4<
190        const X_SRC: usize,
191        const Y_SRC: usize,
192        const Z_SRC: usize,
193        const W_SRC: usize,
194    >(
195        vec: Vec3<Self>,
196    ) -> Vec4<Self> {
197        let vec_as_vec4 = Vec4::<Self>::from_repr(vec.repr());
198
199        vec_as_vec4.swizzle4::<X_SRC, Y_SRC, Z_SRC, W_SRC>()
200    }
201
202    // TODO: optimize eq and ne once masks are implemented
203
204    #[inline(always)]
205    fn vec_neg(vec: Vec3<Self>) -> Vec3<Self> {
206        Vector::from_repr(unsafe { _mm_sub_epi32(vec3!(0).repr(), vec.repr()) })
207    }
208
209    #[inline(always)]
210    fn vec_not(vec: Vec3<Self>) -> Vec3<Self> {
211        Vector::from_repr(unsafe { _mm_xor_si128(vec.repr(), vec3!(-1).repr()) })
212    }
213
214    #[inline(always)]
215    fn vec_add(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
216        if cfg!(debug_assertions) {
217            vec3!(vec.x + rhs.x, vec.y + rhs.y, vec.z + rhs.z)
218        } else {
219            Vector::from_repr(unsafe { _mm_add_epi32(vec.repr(), rhs.repr()) })
220        }
221    }
222
223    #[inline(always)]
224    fn vec_sub(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
225        if cfg!(debug_assertions) {
226            vec3!(vec.x - rhs.x, vec.y - rhs.y, vec.z - rhs.z)
227        } else {
228            Vector::from_repr(unsafe { _mm_sub_epi32(vec.repr(), rhs.repr()) })
229        }
230    }
231
232    #[inline(always)]
233    fn vec_mul(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
234        // TODO: determine if this can be optimized
235
236        vec3!(vec.x * rhs.x, vec.y * rhs.y, vec.z * rhs.z)
237    }
238
239    #[inline(always)]
240    fn vec_div(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
241        vec3!(vec.x / rhs.x, vec.y / rhs.y, vec.z / rhs.z)
242    }
243
244    #[inline(always)]
245    fn vec_rem(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
246        vec3!(vec.x % rhs.x, vec.y % rhs.y, vec.z % rhs.z)
247    }
248
249    #[inline(always)]
250    fn vec_shl(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
251        Vector::from_repr(unsafe { _mm_sll_epi32(vec.repr(), rhs.repr()) })
252    }
253
254    #[inline(always)]
255    fn vec_shr(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
256        Vector::from_repr(unsafe { _mm_sra_epi32(vec.repr(), rhs.repr()) })
257    }
258
259    #[inline(always)]
260    fn vec_bitand(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
261        Vector::from_repr(unsafe { _mm_and_si128(vec.repr(), rhs.repr()) })
262    }
263
264    #[inline(always)]
265    fn vec_bitor(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
266        Vector::from_repr(unsafe { _mm_or_si128(vec.repr(), rhs.repr()) })
267    }
268
269    #[inline(always)]
270    fn vec_bitxor(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
271        Vector::from_repr(unsafe { _mm_xor_si128(vec.repr(), rhs.repr()) })
272    }
273}