1mod floating;
2mod integer;
3mod num;
4
5use crate::SimdElement;
6use core::arch::x86_64::*;
7
8unsafe impl SimdElement for i8 {
9 type Vector = __m256i;
10 const VECTOR_LEN: usize = 32;
11
12 #[inline(always)]
13 fn is_available() -> bool {
14 std::is_x86_feature_detected!("avx2")
15 }
16
17 #[inline(always)]
18 unsafe fn load(src: *const Self) -> Self::Vector {
19 unsafe { _mm256_loadu_si256(src as _) }
20 }
21
22 #[inline(always)]
23 unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
24 let mut buff = [0i8; Self::VECTOR_LEN];
25 unsafe {
26 src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
27 Self::load(buff.as_ptr())
28 }
29 }
30
31 #[inline(always)]
32 unsafe fn store(dst: *mut Self, src: Self::Vector) {
33 unsafe {
34 _mm256_storeu_si256(dst as _, src);
35 }
36 }
37
38 #[inline(always)]
39 unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
40 let mut buff = [0i8; Self::VECTOR_LEN];
41 unsafe {
42 Self::store(buff.as_mut_ptr(), src);
43 buff.as_ptr().copy_to_nonoverlapping(dst, len);
44 }
45 }
46
47 #[inline(always)]
48 unsafe fn set(value: Self) -> Self::Vector {
49 unsafe { _mm256_set1_epi8(value) }
50 }
51}
52
53unsafe impl SimdElement for u8 {
54 type Vector = __m256i;
55 const VECTOR_LEN: usize = 32;
56
57 #[inline(always)]
58 fn is_available() -> bool {
59 std::is_x86_feature_detected!("avx2")
60 }
61
62 #[inline(always)]
63 unsafe fn load(src: *const Self) -> Self::Vector {
64 unsafe { _mm256_loadu_si256(src as _) }
65 }
66
67 #[inline(always)]
68 unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
69 let mut buff = [0u8; Self::VECTOR_LEN];
70 unsafe {
71 src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
72 Self::load(buff.as_ptr())
73 }
74 }
75
76 #[inline(always)]
77 unsafe fn store(dst: *mut Self, src: Self::Vector) {
78 unsafe {
79 _mm256_storeu_si256(dst as _, src);
80 }
81 }
82
83 #[inline(always)]
84 unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
85 let mut buff = [0u8; Self::VECTOR_LEN];
86 unsafe {
87 Self::store(buff.as_mut_ptr(), src);
88 buff.as_ptr().copy_to_nonoverlapping(dst, len);
89 }
90 }
91
92 #[inline(always)]
93 unsafe fn set(value: Self) -> Self::Vector {
94 unsafe { _mm256_set1_epi8(value as i8) }
95 }
96}
97
98unsafe impl SimdElement for i16 {
99 type Vector = __m256i;
100 const VECTOR_LEN: usize = 16;
101
102 #[inline(always)]
103 fn is_available() -> bool {
104 std::is_x86_feature_detected!("avx2")
105 }
106
107 #[inline(always)]
108 unsafe fn load(src: *const Self) -> Self::Vector {
109 unsafe { _mm256_loadu_si256(src as _) }
110 }
111
112 #[inline(always)]
113 unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
114 let mut buff = [0i16; Self::VECTOR_LEN];
115 unsafe {
116 src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
117 Self::load(buff.as_ptr())
118 }
119 }
120
121 #[inline(always)]
122 unsafe fn store(dst: *mut Self, src: Self::Vector) {
123 unsafe {
124 _mm256_storeu_si256(dst as _, src);
125 }
126 }
127
128 #[inline(always)]
129 unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
130 let mut buff = [0i16; Self::VECTOR_LEN];
131 unsafe {
132 Self::store(buff.as_mut_ptr(), src);
133 buff.as_ptr().copy_to_nonoverlapping(dst, len);
134 }
135 }
136
137 #[inline(always)]
138 unsafe fn set(value: Self) -> Self::Vector {
139 unsafe { _mm256_set1_epi16(value) }
140 }
141}
142
143unsafe impl SimdElement for u16 {
144 type Vector = __m256i;
145 const VECTOR_LEN: usize = 16;
146
147 #[inline(always)]
148 fn is_available() -> bool {
149 std::is_x86_feature_detected!("avx2")
150 }
151
152 #[inline(always)]
153 unsafe fn load(src: *const Self) -> Self::Vector {
154 unsafe { _mm256_loadu_si256(src as _) }
155 }
156
157 #[inline(always)]
158 unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
159 let mut buff = [0u16; Self::VECTOR_LEN];
160 unsafe {
161 src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
162 Self::load(buff.as_ptr())
163 }
164 }
165
166 #[inline(always)]
167 unsafe fn store(dst: *mut Self, src: Self::Vector) {
168 unsafe {
169 _mm256_storeu_si256(dst as _, src);
170 }
171 }
172
173 #[inline(always)]
174 unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
175 let mut buff = [0u16; Self::VECTOR_LEN];
176 unsafe {
177 Self::store(buff.as_mut_ptr(), src);
178 buff.as_ptr().copy_to_nonoverlapping(dst, len);
179 }
180 }
181
182 #[inline(always)]
183 unsafe fn set(value: Self) -> Self::Vector {
184 unsafe { _mm256_set1_epi16(value as i16) }
185 }
186}
187
188unsafe impl SimdElement for i32 {
189 type Vector = __m256i;
190 const VECTOR_LEN: usize = 8;
191
192 #[inline(always)]
193 fn is_available() -> bool {
194 std::is_x86_feature_detected!("avx2")
195 }
196
197 #[inline(always)]
198 unsafe fn load(src: *const Self) -> Self::Vector {
199 unsafe { _mm256_loadu_si256(src as _) }
200 }
201
202 #[inline(always)]
203 unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
204 let mut buff = [0i32; Self::VECTOR_LEN];
205 unsafe {
206 src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
207 Self::load(buff.as_ptr())
208 }
209 }
210
211 #[inline(always)]
212 unsafe fn store(dst: *mut Self, src: Self::Vector) {
213 unsafe {
214 _mm256_storeu_si256(dst as _, src);
215 }
216 }
217
218 #[inline(always)]
219 unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
220 let mut buff = [0i32; Self::VECTOR_LEN];
221 unsafe {
222 Self::store(buff.as_mut_ptr(), src);
223 buff.as_ptr().copy_to_nonoverlapping(dst, len);
224 }
225 }
226
227 #[inline(always)]
228 unsafe fn set(value: Self) -> Self::Vector {
229 unsafe { _mm256_set1_epi32(value) }
230 }
231}
232
233unsafe impl SimdElement for u32 {
234 type Vector = __m256i;
235 const VECTOR_LEN: usize = 8;
236
237 #[inline(always)]
238 fn is_available() -> bool {
239 std::is_x86_feature_detected!("avx2")
240 }
241
242 #[inline(always)]
243 unsafe fn load(src: *const Self) -> Self::Vector {
244 unsafe { _mm256_loadu_si256(src as _) }
245 }
246
247 #[inline(always)]
248 unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
249 let mut buff = [0u32; Self::VECTOR_LEN];
250 unsafe {
251 src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
252 Self::load(buff.as_ptr())
253 }
254 }
255
256 #[inline(always)]
257 unsafe fn store(dst: *mut Self, src: Self::Vector) {
258 unsafe {
259 _mm256_storeu_si256(dst as _, src);
260 }
261 }
262
263 #[inline(always)]
264 unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
265 let mut buff = [0u32; Self::VECTOR_LEN];
266 unsafe {
267 Self::store(buff.as_mut_ptr(), src);
268 buff.as_ptr().copy_to_nonoverlapping(dst, len);
269 }
270 }
271
272 #[inline(always)]
273 unsafe fn set(value: Self) -> Self::Vector {
274 unsafe { _mm256_set1_epi32(value as i32) }
275 }
276}
277
278unsafe impl SimdElement for i64 {
279 type Vector = __m256i;
280 const VECTOR_LEN: usize = 4;
281
282 #[inline(always)]
283 fn is_available() -> bool {
284 std::is_x86_feature_detected!("avx2")
285 }
286
287 #[inline(always)]
288 unsafe fn load(src: *const Self) -> Self::Vector {
289 unsafe { _mm256_loadu_si256(src as _) }
290 }
291
292 #[inline(always)]
293 unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
294 let mut buff = [0i64; Self::VECTOR_LEN];
295 unsafe {
296 src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
297 Self::load(buff.as_ptr())
298 }
299 }
300
301 #[inline(always)]
302 unsafe fn store(dst: *mut Self, src: Self::Vector) {
303 unsafe {
304 _mm256_storeu_si256(dst as _, src);
305 }
306 }
307
308 #[inline(always)]
309 unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
310 let mut buff = [0i64; Self::VECTOR_LEN];
311 unsafe {
312 Self::store(buff.as_mut_ptr(), src);
313 buff.as_ptr().copy_to_nonoverlapping(dst, len);
314 }
315 }
316
317 #[inline(always)]
318 unsafe fn set(value: Self) -> Self::Vector {
319 unsafe { _mm256_set1_epi64x(value) }
320 }
321}
322
323unsafe impl SimdElement for u64 {
324 type Vector = __m256i;
325 const VECTOR_LEN: usize = 4;
326
327 #[inline(always)]
328 fn is_available() -> bool {
329 std::is_x86_feature_detected!("avx2")
330 }
331
332 #[inline(always)]
333 unsafe fn load(src: *const Self) -> Self::Vector {
334 unsafe { _mm256_loadu_si256(src as _) }
335 }
336
337 #[inline(always)]
338 unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
339 let mut buff = [0u64; Self::VECTOR_LEN];
340 unsafe {
341 src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
342 Self::load(buff.as_ptr())
343 }
344 }
345
346 #[inline(always)]
347 unsafe fn store(dst: *mut Self, src: Self::Vector) {
348 unsafe {
349 _mm256_storeu_si256(dst as _, src);
350 }
351 }
352
353 #[inline(always)]
354 unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
355 let mut buff = [0u64; Self::VECTOR_LEN];
356 unsafe {
357 Self::store(buff.as_mut_ptr(), src);
358 buff.as_ptr().copy_to_nonoverlapping(dst, len);
359 }
360 }
361
362 #[inline(always)]
363 unsafe fn set(value: Self) -> Self::Vector {
364 unsafe { _mm256_set1_epi64x(value as i64) }
365 }
366}
367
368unsafe impl SimdElement for f32 {
369 type Vector = __m256;
370 const VECTOR_LEN: usize = 8;
371
372 #[inline(always)]
373 fn is_available() -> bool {
374 std::is_x86_feature_detected!("avx2")
375 }
376
377 #[inline(always)]
378 unsafe fn load(src: *const Self) -> Self::Vector {
379 unsafe { _mm256_loadu_ps(src as _) }
380 }
381
382 #[inline(always)]
383 unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
384 let mut buff = [0.0f32; Self::VECTOR_LEN];
385 unsafe {
386 src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
387 Self::load(buff.as_ptr())
388 }
389 }
390
391 #[inline(always)]
392 unsafe fn store(dst: *mut Self, src: Self::Vector) {
393 unsafe {
394 _mm256_storeu_ps(dst as _, src);
395 }
396 }
397
398 #[inline(always)]
399 unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
400 let mut buff = [0.0f32; Self::VECTOR_LEN];
401 unsafe {
402 Self::store(buff.as_mut_ptr(), src);
403 buff.as_ptr().copy_to_nonoverlapping(dst, len);
404 }
405 }
406
407 #[inline(always)]
408 unsafe fn set(value: Self) -> Self::Vector {
409 unsafe { _mm256_set1_ps(value) }
410 }
411}
412
413unsafe impl SimdElement for f64 {
414 type Vector = __m256d;
415 const VECTOR_LEN: usize = 4;
416
417 #[inline(always)]
418 fn is_available() -> bool {
419 std::is_x86_feature_detected!("avx2")
420 }
421
422 #[inline(always)]
423 unsafe fn load(src: *const Self) -> Self::Vector {
424 unsafe { _mm256_loadu_pd(src as _) }
425 }
426
427 #[inline(always)]
428 unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
429 let mut buff = [0.0f64; Self::VECTOR_LEN];
430 unsafe {
431 src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
432 Self::load(buff.as_ptr())
433 }
434 }
435
436 #[inline(always)]
437 unsafe fn store(dst: *mut Self, src: Self::Vector) {
438 unsafe {
439 _mm256_storeu_pd(dst as _, src);
440 }
441 }
442
443 #[inline(always)]
444 unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
445 let mut buff = [0.0f64; Self::VECTOR_LEN];
446 unsafe {
447 Self::store(buff.as_mut_ptr(), src);
448 buff.as_ptr().copy_to_nonoverlapping(dst, len);
449 }
450 }
451
452 #[inline(always)]
453 unsafe fn set(value: Self) -> Self::Vector {
454 unsafe { _mm256_set1_pd(value) }
455 }
456}