Skip to main content

carbon_simd/
x86_64.rs

1mod floating;
2mod integer;
3mod num;
4
5use crate::SimdElement;
6use core::arch::x86_64::*;
7
8unsafe impl SimdElement for i8 {
9    type Vector = __m256i;
10    const VECTOR_LEN: usize = 32;
11
12    #[inline(always)]
13    fn is_available() -> bool {
14        std::is_x86_feature_detected!("avx2")
15    }
16
17    #[inline(always)]
18    unsafe fn load(src: *const Self) -> Self::Vector {
19        unsafe { _mm256_loadu_si256(src as _) }
20    }
21
22    #[inline(always)]
23    unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
24        let mut buff = [0i8; Self::VECTOR_LEN];
25        unsafe {
26            src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
27            Self::load(buff.as_ptr())
28        }
29    }
30
31    #[inline(always)]
32    unsafe fn store(dst: *mut Self, src: Self::Vector) {
33        unsafe {
34            _mm256_storeu_si256(dst as _, src);
35        }
36    }
37
38    #[inline(always)]
39    unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
40        let mut buff = [0i8; Self::VECTOR_LEN];
41        unsafe {
42            Self::store(buff.as_mut_ptr(), src);
43            buff.as_ptr().copy_to_nonoverlapping(dst, len);
44        }
45    }
46
47    #[inline(always)]
48    unsafe fn set(value: Self) -> Self::Vector {
49        unsafe { _mm256_set1_epi8(value) }
50    }
51}
52
53unsafe impl SimdElement for u8 {
54    type Vector = __m256i;
55    const VECTOR_LEN: usize = 32;
56
57    #[inline(always)]
58    fn is_available() -> bool {
59        std::is_x86_feature_detected!("avx2")
60    }
61
62    #[inline(always)]
63    unsafe fn load(src: *const Self) -> Self::Vector {
64        unsafe { _mm256_loadu_si256(src as _) }
65    }
66
67    #[inline(always)]
68    unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
69        let mut buff = [0u8; Self::VECTOR_LEN];
70        unsafe {
71            src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
72            Self::load(buff.as_ptr())
73        }
74    }
75
76    #[inline(always)]
77    unsafe fn store(dst: *mut Self, src: Self::Vector) {
78        unsafe {
79            _mm256_storeu_si256(dst as _, src);
80        }
81    }
82
83    #[inline(always)]
84    unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
85        let mut buff = [0u8; Self::VECTOR_LEN];
86        unsafe {
87            Self::store(buff.as_mut_ptr(), src);
88            buff.as_ptr().copy_to_nonoverlapping(dst, len);
89        }
90    }
91
92    #[inline(always)]
93    unsafe fn set(value: Self) -> Self::Vector {
94        unsafe { _mm256_set1_epi8(value as i8) }
95    }
96}
97
98unsafe impl SimdElement for i16 {
99    type Vector = __m256i;
100    const VECTOR_LEN: usize = 16;
101
102    #[inline(always)]
103    fn is_available() -> bool {
104        std::is_x86_feature_detected!("avx2")
105    }
106
107    #[inline(always)]
108    unsafe fn load(src: *const Self) -> Self::Vector {
109        unsafe { _mm256_loadu_si256(src as _) }
110    }
111
112    #[inline(always)]
113    unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
114        let mut buff = [0i16; Self::VECTOR_LEN];
115        unsafe {
116            src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
117            Self::load(buff.as_ptr())
118        }
119    }
120
121    #[inline(always)]
122    unsafe fn store(dst: *mut Self, src: Self::Vector) {
123        unsafe {
124            _mm256_storeu_si256(dst as _, src);
125        }
126    }
127
128    #[inline(always)]
129    unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
130        let mut buff = [0i16; Self::VECTOR_LEN];
131        unsafe {
132            Self::store(buff.as_mut_ptr(), src);
133            buff.as_ptr().copy_to_nonoverlapping(dst, len);
134        }
135    }
136
137    #[inline(always)]
138    unsafe fn set(value: Self) -> Self::Vector {
139        unsafe { _mm256_set1_epi16(value) }
140    }
141}
142
143unsafe impl SimdElement for u16 {
144    type Vector = __m256i;
145    const VECTOR_LEN: usize = 16;
146
147    #[inline(always)]
148    fn is_available() -> bool {
149        std::is_x86_feature_detected!("avx2")
150    }
151
152    #[inline(always)]
153    unsafe fn load(src: *const Self) -> Self::Vector {
154        unsafe { _mm256_loadu_si256(src as _) }
155    }
156
157    #[inline(always)]
158    unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
159        let mut buff = [0u16; Self::VECTOR_LEN];
160        unsafe {
161            src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
162            Self::load(buff.as_ptr())
163        }
164    }
165
166    #[inline(always)]
167    unsafe fn store(dst: *mut Self, src: Self::Vector) {
168        unsafe {
169            _mm256_storeu_si256(dst as _, src);
170        }
171    }
172
173    #[inline(always)]
174    unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
175        let mut buff = [0u16; Self::VECTOR_LEN];
176        unsafe {
177            Self::store(buff.as_mut_ptr(), src);
178            buff.as_ptr().copy_to_nonoverlapping(dst, len);
179        }
180    }
181
182    #[inline(always)]
183    unsafe fn set(value: Self) -> Self::Vector {
184        unsafe { _mm256_set1_epi16(value as i16) }
185    }
186}
187
188unsafe impl SimdElement for i32 {
189    type Vector = __m256i;
190    const VECTOR_LEN: usize = 8;
191
192    #[inline(always)]
193    fn is_available() -> bool {
194        std::is_x86_feature_detected!("avx2")
195    }
196
197    #[inline(always)]
198    unsafe fn load(src: *const Self) -> Self::Vector {
199        unsafe { _mm256_loadu_si256(src as _) }
200    }
201
202    #[inline(always)]
203    unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
204        let mut buff = [0i32; Self::VECTOR_LEN];
205        unsafe {
206            src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
207            Self::load(buff.as_ptr())
208        }
209    }
210
211    #[inline(always)]
212    unsafe fn store(dst: *mut Self, src: Self::Vector) {
213        unsafe {
214            _mm256_storeu_si256(dst as _, src);
215        }
216    }
217
218    #[inline(always)]
219    unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
220        let mut buff = [0i32; Self::VECTOR_LEN];
221        unsafe {
222            Self::store(buff.as_mut_ptr(), src);
223            buff.as_ptr().copy_to_nonoverlapping(dst, len);
224        }
225    }
226
227    #[inline(always)]
228    unsafe fn set(value: Self) -> Self::Vector {
229        unsafe { _mm256_set1_epi32(value) }
230    }
231}
232
233unsafe impl SimdElement for u32 {
234    type Vector = __m256i;
235    const VECTOR_LEN: usize = 8;
236
237    #[inline(always)]
238    fn is_available() -> bool {
239        std::is_x86_feature_detected!("avx2")
240    }
241
242    #[inline(always)]
243    unsafe fn load(src: *const Self) -> Self::Vector {
244        unsafe { _mm256_loadu_si256(src as _) }
245    }
246
247    #[inline(always)]
248    unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
249        let mut buff = [0u32; Self::VECTOR_LEN];
250        unsafe {
251            src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
252            Self::load(buff.as_ptr())
253        }
254    }
255
256    #[inline(always)]
257    unsafe fn store(dst: *mut Self, src: Self::Vector) {
258        unsafe {
259            _mm256_storeu_si256(dst as _, src);
260        }
261    }
262
263    #[inline(always)]
264    unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
265        let mut buff = [0u32; Self::VECTOR_LEN];
266        unsafe {
267            Self::store(buff.as_mut_ptr(), src);
268            buff.as_ptr().copy_to_nonoverlapping(dst, len);
269        }
270    }
271
272    #[inline(always)]
273    unsafe fn set(value: Self) -> Self::Vector {
274        unsafe { _mm256_set1_epi32(value as i32) }
275    }
276}
277
278unsafe impl SimdElement for i64 {
279    type Vector = __m256i;
280    const VECTOR_LEN: usize = 4;
281
282    #[inline(always)]
283    fn is_available() -> bool {
284        std::is_x86_feature_detected!("avx2")
285    }
286
287    #[inline(always)]
288    unsafe fn load(src: *const Self) -> Self::Vector {
289        unsafe { _mm256_loadu_si256(src as _) }
290    }
291
292    #[inline(always)]
293    unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
294        let mut buff = [0i64; Self::VECTOR_LEN];
295        unsafe {
296            src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
297            Self::load(buff.as_ptr())
298        }
299    }
300
301    #[inline(always)]
302    unsafe fn store(dst: *mut Self, src: Self::Vector) {
303        unsafe {
304            _mm256_storeu_si256(dst as _, src);
305        }
306    }
307
308    #[inline(always)]
309    unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
310        let mut buff = [0i64; Self::VECTOR_LEN];
311        unsafe {
312            Self::store(buff.as_mut_ptr(), src);
313            buff.as_ptr().copy_to_nonoverlapping(dst, len);
314        }
315    }
316
317    #[inline(always)]
318    unsafe fn set(value: Self) -> Self::Vector {
319        unsafe { _mm256_set1_epi64x(value) }
320    }
321}
322
323unsafe impl SimdElement for u64 {
324    type Vector = __m256i;
325    const VECTOR_LEN: usize = 4;
326
327    #[inline(always)]
328    fn is_available() -> bool {
329        std::is_x86_feature_detected!("avx2")
330    }
331
332    #[inline(always)]
333    unsafe fn load(src: *const Self) -> Self::Vector {
334        unsafe { _mm256_loadu_si256(src as _) }
335    }
336
337    #[inline(always)]
338    unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
339        let mut buff = [0u64; Self::VECTOR_LEN];
340        unsafe {
341            src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
342            Self::load(buff.as_ptr())
343        }
344    }
345
346    #[inline(always)]
347    unsafe fn store(dst: *mut Self, src: Self::Vector) {
348        unsafe {
349            _mm256_storeu_si256(dst as _, src);
350        }
351    }
352
353    #[inline(always)]
354    unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
355        let mut buff = [0u64; Self::VECTOR_LEN];
356        unsafe {
357            Self::store(buff.as_mut_ptr(), src);
358            buff.as_ptr().copy_to_nonoverlapping(dst, len);
359        }
360    }
361
362    #[inline(always)]
363    unsafe fn set(value: Self) -> Self::Vector {
364        unsafe { _mm256_set1_epi64x(value as i64) }
365    }
366}
367
368unsafe impl SimdElement for f32 {
369    type Vector = __m256;
370    const VECTOR_LEN: usize = 8;
371
372    #[inline(always)]
373    fn is_available() -> bool {
374        std::is_x86_feature_detected!("avx2")
375    }
376
377    #[inline(always)]
378    unsafe fn load(src: *const Self) -> Self::Vector {
379        unsafe { _mm256_loadu_ps(src as _) }
380    }
381
382    #[inline(always)]
383    unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
384        let mut buff = [0.0f32; Self::VECTOR_LEN];
385        unsafe {
386            src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
387            Self::load(buff.as_ptr())
388        }
389    }
390
391    #[inline(always)]
392    unsafe fn store(dst: *mut Self, src: Self::Vector) {
393        unsafe {
394            _mm256_storeu_ps(dst as _, src);
395        }
396    }
397
398    #[inline(always)]
399    unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
400        let mut buff = [0.0f32; Self::VECTOR_LEN];
401        unsafe {
402            Self::store(buff.as_mut_ptr(), src);
403            buff.as_ptr().copy_to_nonoverlapping(dst, len);
404        }
405    }
406
407    #[inline(always)]
408    unsafe fn set(value: Self) -> Self::Vector {
409        unsafe { _mm256_set1_ps(value) }
410    }
411}
412
413unsafe impl SimdElement for f64 {
414    type Vector = __m256d;
415    const VECTOR_LEN: usize = 4;
416
417    #[inline(always)]
418    fn is_available() -> bool {
419        std::is_x86_feature_detected!("avx2")
420    }
421
422    #[inline(always)]
423    unsafe fn load(src: *const Self) -> Self::Vector {
424        unsafe { _mm256_loadu_pd(src as _) }
425    }
426
427    #[inline(always)]
428    unsafe fn load_partial(src: *const Self, len: usize) -> Self::Vector {
429        let mut buff = [0.0f64; Self::VECTOR_LEN];
430        unsafe {
431            src.copy_to_nonoverlapping(buff.as_mut_ptr(), len);
432            Self::load(buff.as_ptr())
433        }
434    }
435
436    #[inline(always)]
437    unsafe fn store(dst: *mut Self, src: Self::Vector) {
438        unsafe {
439            _mm256_storeu_pd(dst as _, src);
440        }
441    }
442
443    #[inline(always)]
444    unsafe fn store_partial(dst: *mut Self, src: Self::Vector, len: usize) {
445        let mut buff = [0.0f64; Self::VECTOR_LEN];
446        unsafe {
447            Self::store(buff.as_mut_ptr(), src);
448            buff.as_ptr().copy_to_nonoverlapping(dst, len);
449        }
450    }
451
452    #[inline(always)]
453    unsafe fn set(value: Self) -> Self::Vector {
454        unsafe { _mm256_set1_pd(value) }
455    }
456}