generic_simd/arch/x86/
mod.rs1#[cfg(feature = "complex")]
4mod complex;
5#[cfg(feature = "complex")]
6pub use complex::*;
7
8use crate::{
9 arch::{generic, Token},
10 scalar::Scalar,
11 shim::{Shim2, Shim4, ShimToken},
12 vector::{width, Native, Vector},
13};
14
15#[cfg(target_arch = "x86")]
16use core::arch::x86::*;
17#[cfg(target_arch = "x86_64")]
18use core::arch::x86_64::*;
19
20#[derive(Copy, Clone, Debug)]
22pub struct Sse(());
23
24#[derive(Copy, Clone, Debug)]
26pub struct Avx(());
27
28impl_token! { Sse => "sse4.1" }
29impl_token! { Avx => "avx" }
30
31impl core::convert::From<Avx> for Sse {
32 #[inline]
33 fn from(_: Avx) -> Sse {
34 unsafe { Sse::new_unchecked() }
35 }
36}
37
38impl Native<Sse> for f32 {
39 type Width = width::W4;
40}
41
42impl Native<Sse> for f64 {
43 type Width = width::W2;
44}
45
46impl Native<Avx> for f32 {
47 type Width = width::W8;
48}
49
50impl Native<Avx> for f64 {
51 type Width = width::W4;
52}
53
54#[derive(Clone, Copy, Debug)]
56#[repr(transparent)]
57#[allow(non_camel_case_types)]
58pub struct f32x4(__m128);
59
60#[derive(Clone, Copy, Debug)]
62#[repr(transparent)]
63#[allow(non_camel_case_types)]
64pub struct f64x2(__m128d);
65
66#[derive(Clone, Copy, Debug)]
68#[repr(transparent)]
69#[allow(non_camel_case_types)]
70pub struct f32x8(__m256);
71
72#[derive(Clone, Copy, Debug)]
74#[repr(transparent)]
75#[allow(non_camel_case_types)]
76pub struct f64x4(__m256d);
77
78impl Scalar<Sse, width::W1> for f32 {
79 type Vector = ShimToken<generic::f32x1, Self, Sse>;
80}
81
82impl Scalar<Sse, width::W2> for f32 {
83 type Vector = ShimToken<Shim2<generic::f32x1, Self>, Self, Sse>;
84}
85
86impl Scalar<Sse, width::W4> for f32 {
87 type Vector = f32x4;
88}
89
90impl Scalar<Sse, width::W8> for f32 {
91 type Vector = Shim2<f32x4, f32>;
92}
93
94impl Scalar<Sse, width::W1> for f64 {
95 type Vector = ShimToken<generic::f64x1, Self, Sse>;
96}
97
98impl Scalar<Sse, width::W2> for f64 {
99 type Vector = f64x2;
100}
101
102impl Scalar<Sse, width::W4> for f64 {
103 type Vector = Shim2<f64x2, f64>;
104}
105
106impl Scalar<Sse, width::W8> for f64 {
107 type Vector = Shim4<f64x2, f64>;
108}
109
110impl Scalar<Avx, width::W1> for f32 {
111 type Vector = ShimToken<generic::f32x1, Self, Avx>;
112}
113
114impl Scalar<Avx, width::W2> for f32 {
115 type Vector = ShimToken<Shim2<generic::f32x1, Self>, Self, Avx>;
116}
117
118impl Scalar<Avx, width::W4> for f32 {
119 type Vector = ShimToken<f32x4, Self, Avx>;
120}
121
122impl Scalar<Avx, width::W8> for f32 {
123 type Vector = f32x8;
124}
125
126impl Scalar<Avx, width::W1> for f64 {
127 type Vector = ShimToken<generic::f64x1, Self, Avx>;
128}
129
130impl Scalar<Avx, width::W2> for f64 {
131 type Vector = ShimToken<f64x2, Self, Avx>;
132}
133
134impl Scalar<Avx, width::W4> for f64 {
135 type Vector = f64x4;
136}
137
138impl Scalar<Avx, width::W8> for f64 {
139 type Vector = Shim2<f64x4, f64>;
140}
141
142arithmetic_ops! {
143 feature: Sse::new_unchecked(),
144 for f32x4:
145 add -> (_mm_add_ps),
146 sub -> (_mm_sub_ps),
147 mul -> (_mm_mul_ps),
148 div -> (_mm_div_ps)
149}
150
151arithmetic_ops! {
152 feature: Sse::new_unchecked(),
153 for f64x2:
154 add -> (_mm_add_pd),
155 sub -> (_mm_sub_pd),
156 mul -> (_mm_mul_pd),
157 div -> (_mm_div_pd)
158}
159
160arithmetic_ops! {
161 feature: Avx::new_unchecked(),
162 for f32x8:
163 add -> (_mm256_add_ps),
164 sub -> (_mm256_sub_ps),
165 mul -> (_mm256_mul_ps),
166 div -> (_mm256_div_ps)
167}
168
169arithmetic_ops! {
170 feature: Avx::new_unchecked(),
171 for f64x4:
172 add -> (_mm256_add_pd),
173 sub -> (_mm256_sub_pd),
174 mul -> (_mm256_mul_pd),
175 div -> (_mm256_div_pd)
176}
177
178impl core::ops::Neg for f32x4 {
179 type Output = Self;
180
181 #[inline]
182 fn neg(self) -> Self {
183 Self(unsafe { _mm_xor_ps(self.0, _mm_set1_ps(-0.)) })
184 }
185}
186
187impl core::ops::Neg for f64x2 {
188 type Output = Self;
189
190 #[inline]
191 fn neg(self) -> Self {
192 Self(unsafe { _mm_xor_pd(self.0, _mm_set1_pd(-0.)) })
193 }
194}
195
196impl core::ops::Neg for f32x8 {
197 type Output = Self;
198
199 #[inline]
200 fn neg(self) -> Self {
201 Self(unsafe { _mm256_xor_ps(self.0, _mm256_set1_ps(-0.)) })
202 }
203}
204
205impl core::ops::Neg for f64x4 {
206 type Output = Self;
207
208 #[inline]
209 fn neg(self) -> Self {
210 Self(unsafe { _mm256_xor_pd(self.0, _mm256_set1_pd(-0.)) })
211 }
212}
213
214as_slice! { f32x4 }
215as_slice! { f32x8 }
216as_slice! { f64x2 }
217as_slice! { f64x4 }
218
219unsafe impl Vector for f32x4 {
220 type Scalar = f32;
221
222 type Token = Sse;
223
224 type Width = crate::vector::width::W4;
225
226 type Underlying = __m128;
227
228 #[inline]
229 fn zeroed(_: Self::Token) -> Self {
230 Self(unsafe { _mm_setzero_ps() })
231 }
232
233 #[inline]
234 fn splat(_: Self::Token, from: Self::Scalar) -> Self {
235 Self(unsafe { _mm_set1_ps(from) })
236 }
237}
238
239unsafe impl Vector for f64x2 {
240 type Scalar = f64;
241
242 type Token = Sse;
243
244 type Width = crate::vector::width::W2;
245
246 type Underlying = __m128d;
247
248 #[inline]
249 fn zeroed(_: Self::Token) -> Self {
250 Self(unsafe { _mm_setzero_pd() })
251 }
252
253 #[inline]
254 fn splat(_: Self::Token, from: Self::Scalar) -> Self {
255 Self(unsafe { _mm_set1_pd(from) })
256 }
257}
258
259unsafe impl Vector for f32x8 {
260 type Scalar = f32;
261
262 type Token = Avx;
263
264 type Width = crate::vector::width::W8;
265
266 type Underlying = __m256;
267
268 #[inline]
269 fn zeroed(_: Self::Token) -> Self {
270 Self(unsafe { _mm256_setzero_ps() })
271 }
272
273 #[inline]
274 fn splat(_: Self::Token, from: Self::Scalar) -> Self {
275 Self(unsafe { _mm256_set1_ps(from) })
276 }
277}
278
279unsafe impl Vector for f64x4 {
280 type Scalar = f64;
281
282 type Token = Avx;
283
284 type Width = crate::vector::width::W4;
285
286 type Underlying = __m256d;
287
288 #[inline]
289 fn zeroed(_: Self::Token) -> Self {
290 Self(unsafe { _mm256_setzero_pd() })
291 }
292
293 #[inline]
294 fn splat(_: Self::Token, from: Self::Scalar) -> Self {
295 Self(unsafe { _mm256_set1_pd(from) })
296 }
297}