ggmath/vector/primitive_impls/u32/
b128_sse2.rs

1use_core_arch_x86! {
2    __m128i,
3    _mm_add_epi32,
4    _mm_and_si128,
5    _mm_or_si128,
6    _mm_set1_epi32,
7    _mm_set_epi32,
8    _mm_sll_epi32,
9    _mm_sub_epi32,
10    _mm_sra_epi32,
11    _mm_xor_si128,
12}
13
14use core::arch::asm;
15
16use crate::{
17    SimdBehaviour, Vec2, Vec3, Vec4, Vector,
18    vector::{SoundVectorRepr, primitive_impls::use_core_arch_x86, vec2, vec3, vec4},
19};
20
21// SAFETY: __m128i can contain exactly 4 i32s
22unsafe impl SoundVectorRepr<4, u32> for __m128i {}
23
24// SAFETY: __m128i can contain exactly 4 i32s, so it does begin with 3 i32s
25unsafe impl SoundVectorRepr<3, u32> for __m128i {}
26
27////////////////////////////////////////////////////////////////////////////////
28// Vector4
29////////////////////////////////////////////////////////////////////////////////
30
31impl SimdBehaviour<4> for u32 {
32    type VectorRepr = __m128i;
33
34    #[inline(always)]
35    fn vec_from_array(array: [Self; 4]) -> Vec4<Self> {
36        Vector::from_repr(unsafe {
37            _mm_set_epi32(
38                array[3].cast_signed(),
39                array[2].cast_signed(),
40                array[1].cast_signed(),
41                array[0].cast_signed(),
42            )
43        })
44    }
45
46    #[inline(always)]
47    fn vec_splat(value: Self) -> Vec4<Self> {
48        Vector::from_repr(unsafe { _mm_set1_epi32(value.cast_signed()) })
49    }
50
51    #[inline(always)]
52    unsafe fn vec_swizzle2<const X_SRC: usize, const Y_SRC: usize>(vec: Vec4<Self>) -> Vec2<Self> {
53        vec2!(vec[X_SRC], vec[Y_SRC])
54    }
55
56    #[inline(always)]
57    unsafe fn vec_swizzle3<const X_SRC: usize, const Y_SRC: usize, const Z_SRC: usize>(
58        vec: Vec4<Self>,
59    ) -> Vec3<Self> {
60        let result_as_vec4 = vec.swizzle4::<X_SRC, Y_SRC, Z_SRC, Z_SRC>();
61
62        Vector::from_repr(result_as_vec4.repr())
63    }
64
65    #[inline(always)]
66    unsafe fn vec_swizzle4<
67        const X_SRC: usize,
68        const Y_SRC: usize,
69        const Z_SRC: usize,
70        const W_SRC: usize,
71    >(
72        vec: Vec4<Self>,
73    ) -> Vec4<Self> {
74        let result: __m128i;
75        // SAFETY: pshufd is part of sse2, so it is safe to use here.
76        unsafe {
77            asm!("pshufd {0}, {0}, {1}", inout(xmm_reg) vec.repr() => result, const {
78                let x_src_bits = (X_SRC as u32) << 0;
79                let y_src_bits = (Y_SRC as u32) << 2;
80                let z_src_bits = (Z_SRC as u32) << 4;
81                let w_src_bits = (W_SRC as u32) << 6;
82
83                (x_src_bits | y_src_bits | z_src_bits | w_src_bits).cast_signed()
84            });
85        }
86
87        Vector::from_repr(result)
88    }
89
90    // TODO: optimize eq and ne once masks are implemented
91
92    #[inline(always)]
93    fn vec_not(vec: Vec4<Self>) -> Vec4<Self> {
94        Vector::from_repr(unsafe { _mm_xor_si128(vec.repr(), vec4!(-1).repr()) })
95    }
96
97    #[inline(always)]
98    fn vec_add(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
99        if cfg!(debug_assertions) {
100            vec4!(vec.x + rhs.x, vec.y + rhs.y, vec.z + rhs.z, vec.w + rhs.w)
101        } else {
102            Vector::from_repr(unsafe { _mm_add_epi32(vec.repr(), rhs.repr()) })
103        }
104    }
105
106    #[inline(always)]
107    fn vec_sub(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
108        if cfg!(debug_assertions) {
109            vec4!(vec.x - rhs.x, vec.y - rhs.y, vec.z - rhs.z, vec.w - rhs.w)
110        } else {
111            Vector::from_repr(unsafe { _mm_sub_epi32(vec.repr(), rhs.repr()) })
112        }
113    }
114
115    #[inline(always)]
116    fn vec_mul(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
117        // TODO: determine if this can be optimized
118
119        vec4!(vec.x * rhs.x, vec.y * rhs.y, vec.z * rhs.z, vec.w * rhs.w)
120    }
121
122    #[inline(always)]
123    fn vec_div(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
124        vec4!(vec.x / rhs.x, vec.y / rhs.y, vec.z / rhs.z, vec.w / rhs.w)
125    }
126
127    #[inline(always)]
128    fn vec_rem(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
129        vec4!(vec.x % rhs.x, vec.y % rhs.y, vec.z % rhs.z, vec.w % rhs.w)
130    }
131
132    #[inline(always)]
133    fn vec_shl(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
134        Vector::from_repr(unsafe { _mm_sll_epi32(vec.repr(), rhs.repr()) })
135    }
136
137    #[inline(always)]
138    fn vec_shr(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
139        Vector::from_repr(unsafe { _mm_sra_epi32(vec.repr(), rhs.repr()) })
140    }
141
142    #[inline(always)]
143    fn vec_bitand(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
144        Vector::from_repr(unsafe { _mm_and_si128(vec.repr(), rhs.repr()) })
145    }
146
147    #[inline(always)]
148    fn vec_bitor(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
149        Vector::from_repr(unsafe { _mm_or_si128(vec.repr(), rhs.repr()) })
150    }
151
152    #[inline(always)]
153    fn vec_bitxor(vec: Vec4<Self>, rhs: Vec4<Self>) -> Vec4<Self> {
154        Vector::from_repr(unsafe { _mm_xor_si128(vec.repr(), rhs.repr()) })
155    }
156}
157
158////////////////////////////////////////////////////////////////////////////////
159// Vector3
160////////////////////////////////////////////////////////////////////////////////
161
162impl SimdBehaviour<3> for u32 {
163    type VectorRepr = __m128i;
164
165    #[inline(always)]
166    fn vec_from_array(array: [Self; 3]) -> Vec3<Self> {
167        Vector::from_repr(unsafe {
168            _mm_set_epi32(
169                array[2].cast_signed(),
170                array[2].cast_signed(),
171                array[1].cast_signed(),
172                array[0].cast_signed(),
173            )
174        })
175    }
176
177    #[inline(always)]
178    fn vec_splat(value: Self) -> Vec3<Self> {
179        Vector::from_repr(unsafe { _mm_set1_epi32(value.cast_signed()) })
180    }
181
182    #[inline(always)]
183    unsafe fn vec_swizzle2<const X_SRC: usize, const Y_SRC: usize>(vec: Vec3<Self>) -> Vec2<Self> {
184        vec2!(vec[X_SRC], vec[Y_SRC])
185    }
186
187    #[inline(always)]
188    unsafe fn vec_swizzle3<const X_SRC: usize, const Y_SRC: usize, const Z_SRC: usize>(
189        vec: Vec3<Self>,
190    ) -> Vec3<Self> {
191        let vec_as_vec4 = Vec4::<Self>::from_repr(vec.repr());
192        let result_as_vec4 = vec_as_vec4.swizzle4::<X_SRC, Y_SRC, Z_SRC, Z_SRC>();
193
194        Vector::from_repr(result_as_vec4.repr())
195    }
196
197    #[inline(always)]
198    unsafe fn vec_swizzle4<
199        const X_SRC: usize,
200        const Y_SRC: usize,
201        const Z_SRC: usize,
202        const W_SRC: usize,
203    >(
204        vec: Vec3<Self>,
205    ) -> Vec4<Self> {
206        let vec_as_vec4 = Vec4::<Self>::from_repr(vec.repr());
207
208        vec_as_vec4.swizzle4::<X_SRC, Y_SRC, Z_SRC, W_SRC>()
209    }
210
211    // TODO: optimize eq and ne once masks are implemented
212
213    #[inline(always)]
214    fn vec_not(vec: Vec3<Self>) -> Vec3<Self> {
215        Vector::from_repr(unsafe { _mm_xor_si128(vec.repr(), vec3!(-1).repr()) })
216    }
217
218    #[inline(always)]
219    fn vec_add(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
220        if cfg!(debug_assertions) {
221            vec3!(vec.x + rhs.x, vec.y + rhs.y, vec.z + rhs.z)
222        } else {
223            Vector::from_repr(unsafe { _mm_add_epi32(vec.repr(), rhs.repr()) })
224        }
225    }
226
227    #[inline(always)]
228    fn vec_sub(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
229        if cfg!(debug_assertions) {
230            vec3!(vec.x - rhs.x, vec.y - rhs.y, vec.z - rhs.z)
231        } else {
232            Vector::from_repr(unsafe { _mm_sub_epi32(vec.repr(), rhs.repr()) })
233        }
234    }
235
236    #[inline(always)]
237    fn vec_mul(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
238        // TODO: determine if this can be optimized
239
240        vec3!(vec.x * rhs.x, vec.y * rhs.y, vec.z * rhs.z)
241    }
242
243    #[inline(always)]
244    fn vec_div(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
245        vec3!(vec.x / rhs.x, vec.y / rhs.y, vec.z / rhs.z)
246    }
247
248    #[inline(always)]
249    fn vec_rem(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
250        vec3!(vec.x % rhs.x, vec.y % rhs.y, vec.z % rhs.z)
251    }
252
253    #[inline(always)]
254    fn vec_shl(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
255        Vector::from_repr(unsafe { _mm_sll_epi32(vec.repr(), rhs.repr()) })
256    }
257
258    #[inline(always)]
259    fn vec_shr(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
260        Vector::from_repr(unsafe { _mm_sra_epi32(vec.repr(), rhs.repr()) })
261    }
262
263    #[inline(always)]
264    fn vec_bitand(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
265        Vector::from_repr(unsafe { _mm_and_si128(vec.repr(), rhs.repr()) })
266    }
267
268    #[inline(always)]
269    fn vec_bitor(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
270        Vector::from_repr(unsafe { _mm_or_si128(vec.repr(), rhs.repr()) })
271    }
272
273    #[inline(always)]
274    fn vec_bitxor(vec: Vec3<Self>, rhs: Vec3<Self>) -> Vec3<Self> {
275        Vector::from_repr(unsafe { _mm_xor_si128(vec.repr(), rhs.repr()) })
276    }
277}