generic_simd/arch/x86/
mod.rs

1//! x86/x86-64 vector types.
2
3#[cfg(feature = "complex")]
4mod complex;
5#[cfg(feature = "complex")]
6pub use complex::*;
7
8use crate::{
9    arch::{generic, Token},
10    scalar::Scalar,
11    shim::{Shim2, Shim4, ShimToken},
12    vector::{width, Native, Vector},
13};
14
15#[cfg(target_arch = "x86")]
16use core::arch::x86::*;
17#[cfg(target_arch = "x86_64")]
18use core::arch::x86_64::*;
19
20/// SSE4.1 instruction set token.
21#[derive(Copy, Clone, Debug)]
22pub struct Sse(());
23
24/// AVX instruction set token.
25#[derive(Copy, Clone, Debug)]
26pub struct Avx(());
27
28impl_token! { Sse => "sse4.1" }
29impl_token! { Avx => "avx" }
30
31impl core::convert::From<Avx> for Sse {
32    #[inline]
33    fn from(_: Avx) -> Sse {
34        unsafe { Sse::new_unchecked() }
35    }
36}
37
38impl Native<Sse> for f32 {
39    type Width = width::W4;
40}
41
42impl Native<Sse> for f64 {
43    type Width = width::W2;
44}
45
46impl Native<Avx> for f32 {
47    type Width = width::W8;
48}
49
50impl Native<Avx> for f64 {
51    type Width = width::W4;
52}
53
54/// An SSE vector of `f32`s.
55#[derive(Clone, Copy, Debug)]
56#[repr(transparent)]
57#[allow(non_camel_case_types)]
58pub struct f32x4(__m128);
59
60/// An SSE vector of `f64`s.
61#[derive(Clone, Copy, Debug)]
62#[repr(transparent)]
63#[allow(non_camel_case_types)]
64pub struct f64x2(__m128d);
65
66/// An AVX vector of `f32`s.
67#[derive(Clone, Copy, Debug)]
68#[repr(transparent)]
69#[allow(non_camel_case_types)]
70pub struct f32x8(__m256);
71
72/// An AVX vector of `f64`s.
73#[derive(Clone, Copy, Debug)]
74#[repr(transparent)]
75#[allow(non_camel_case_types)]
76pub struct f64x4(__m256d);
77
78impl Scalar<Sse, width::W1> for f32 {
79    type Vector = ShimToken<generic::f32x1, Self, Sse>;
80}
81
82impl Scalar<Sse, width::W2> for f32 {
83    type Vector = ShimToken<Shim2<generic::f32x1, Self>, Self, Sse>;
84}
85
86impl Scalar<Sse, width::W4> for f32 {
87    type Vector = f32x4;
88}
89
90impl Scalar<Sse, width::W8> for f32 {
91    type Vector = Shim2<f32x4, f32>;
92}
93
94impl Scalar<Sse, width::W1> for f64 {
95    type Vector = ShimToken<generic::f64x1, Self, Sse>;
96}
97
98impl Scalar<Sse, width::W2> for f64 {
99    type Vector = f64x2;
100}
101
102impl Scalar<Sse, width::W4> for f64 {
103    type Vector = Shim2<f64x2, f64>;
104}
105
106impl Scalar<Sse, width::W8> for f64 {
107    type Vector = Shim4<f64x2, f64>;
108}
109
110impl Scalar<Avx, width::W1> for f32 {
111    type Vector = ShimToken<generic::f32x1, Self, Avx>;
112}
113
114impl Scalar<Avx, width::W2> for f32 {
115    type Vector = ShimToken<Shim2<generic::f32x1, Self>, Self, Avx>;
116}
117
118impl Scalar<Avx, width::W4> for f32 {
119    type Vector = ShimToken<f32x4, Self, Avx>;
120}
121
122impl Scalar<Avx, width::W8> for f32 {
123    type Vector = f32x8;
124}
125
126impl Scalar<Avx, width::W1> for f64 {
127    type Vector = ShimToken<generic::f64x1, Self, Avx>;
128}
129
130impl Scalar<Avx, width::W2> for f64 {
131    type Vector = ShimToken<f64x2, Self, Avx>;
132}
133
134impl Scalar<Avx, width::W4> for f64 {
135    type Vector = f64x4;
136}
137
138impl Scalar<Avx, width::W8> for f64 {
139    type Vector = Shim2<f64x4, f64>;
140}
141
142arithmetic_ops! {
143    feature: Sse::new_unchecked(),
144    for f32x4:
145        add -> (_mm_add_ps),
146        sub -> (_mm_sub_ps),
147        mul -> (_mm_mul_ps),
148        div -> (_mm_div_ps)
149}
150
151arithmetic_ops! {
152    feature: Sse::new_unchecked(),
153    for f64x2:
154        add -> (_mm_add_pd),
155        sub -> (_mm_sub_pd),
156        mul -> (_mm_mul_pd),
157        div -> (_mm_div_pd)
158}
159
160arithmetic_ops! {
161    feature: Avx::new_unchecked(),
162    for f32x8:
163        add -> (_mm256_add_ps),
164        sub -> (_mm256_sub_ps),
165        mul -> (_mm256_mul_ps),
166        div -> (_mm256_div_ps)
167}
168
169arithmetic_ops! {
170    feature: Avx::new_unchecked(),
171    for f64x4:
172        add -> (_mm256_add_pd),
173        sub -> (_mm256_sub_pd),
174        mul -> (_mm256_mul_pd),
175        div -> (_mm256_div_pd)
176}
177
178impl core::ops::Neg for f32x4 {
179    type Output = Self;
180
181    #[inline]
182    fn neg(self) -> Self {
183        Self(unsafe { _mm_xor_ps(self.0, _mm_set1_ps(-0.)) })
184    }
185}
186
187impl core::ops::Neg for f64x2 {
188    type Output = Self;
189
190    #[inline]
191    fn neg(self) -> Self {
192        Self(unsafe { _mm_xor_pd(self.0, _mm_set1_pd(-0.)) })
193    }
194}
195
196impl core::ops::Neg for f32x8 {
197    type Output = Self;
198
199    #[inline]
200    fn neg(self) -> Self {
201        Self(unsafe { _mm256_xor_ps(self.0, _mm256_set1_ps(-0.)) })
202    }
203}
204
205impl core::ops::Neg for f64x4 {
206    type Output = Self;
207
208    #[inline]
209    fn neg(self) -> Self {
210        Self(unsafe { _mm256_xor_pd(self.0, _mm256_set1_pd(-0.)) })
211    }
212}
213
214as_slice! { f32x4 }
215as_slice! { f32x8 }
216as_slice! { f64x2 }
217as_slice! { f64x4 }
218
219unsafe impl Vector for f32x4 {
220    type Scalar = f32;
221
222    type Token = Sse;
223
224    type Width = crate::vector::width::W4;
225
226    type Underlying = __m128;
227
228    #[inline]
229    fn zeroed(_: Self::Token) -> Self {
230        Self(unsafe { _mm_setzero_ps() })
231    }
232
233    #[inline]
234    fn splat(_: Self::Token, from: Self::Scalar) -> Self {
235        Self(unsafe { _mm_set1_ps(from) })
236    }
237}
238
239unsafe impl Vector for f64x2 {
240    type Scalar = f64;
241
242    type Token = Sse;
243
244    type Width = crate::vector::width::W2;
245
246    type Underlying = __m128d;
247
248    #[inline]
249    fn zeroed(_: Self::Token) -> Self {
250        Self(unsafe { _mm_setzero_pd() })
251    }
252
253    #[inline]
254    fn splat(_: Self::Token, from: Self::Scalar) -> Self {
255        Self(unsafe { _mm_set1_pd(from) })
256    }
257}
258
259unsafe impl Vector for f32x8 {
260    type Scalar = f32;
261
262    type Token = Avx;
263
264    type Width = crate::vector::width::W8;
265
266    type Underlying = __m256;
267
268    #[inline]
269    fn zeroed(_: Self::Token) -> Self {
270        Self(unsafe { _mm256_setzero_ps() })
271    }
272
273    #[inline]
274    fn splat(_: Self::Token, from: Self::Scalar) -> Self {
275        Self(unsafe { _mm256_set1_ps(from) })
276    }
277}
278
279unsafe impl Vector for f64x4 {
280    type Scalar = f64;
281
282    type Token = Avx;
283
284    type Width = crate::vector::width::W4;
285
286    type Underlying = __m256d;
287
288    #[inline]
289    fn zeroed(_: Self::Token) -> Self {
290        Self(unsafe { _mm256_setzero_pd() })
291    }
292
293    #[inline]
294    fn splat(_: Self::Token, from: Self::Scalar) -> Self {
295        Self(unsafe { _mm256_set1_pd(from) })
296    }
297}