Skip to main content

simulated_packed_simd/
lib.rs

1// Simulated SIMD on Rust Stable Channel
2// Inspired by crate `simd` developed for nightly Channel
3// Reference link : https://github.com/rust-lang-nursery/simd
4
5#![allow(clippy::too_many_arguments)]
6
7#![allow(non_camel_case_types)]
8use std::ops::{Add, Sub, Mul, Div, BitAnd, BitOr, BitXor, Not, Shl, Shr};
9
10/// 2x32-bit vectors
11#[repr(packed)]
12#[derive(Debug, Clone, Copy)]
13pub struct u32x2(u32, u32);
14
15#[repr(packed)]
16#[derive(Debug, Clone, Copy)]
17pub struct i32x2(i32, i32);
18
19#[repr(packed)]
20#[derive(Debug, Clone, Copy)]
21pub struct f32x2(f32, f32);
22
23#[repr(packed)]
24#[derive(Debug, Clone, Copy)]
25pub struct bool32x2(i32, i32);
26
27/// 4x8-bit vectors
28#[repr(packed)]
29#[derive(Debug, Clone, Copy)]
30pub struct u8x4(u8, u8, u8, u8);
31
32/// 4x8-bit vectors
33#[repr(packed)]
34#[derive(Debug, Clone, Copy)]
35pub struct i8x4(i8, i8, i8, i8);
36
37/// 4x8-bit vectors
38#[repr(packed)]
39#[derive(Debug, Clone, Copy)]
40pub struct bool8x4(i8, i8, i8, i8);
41
42/// 4x16-bit vectors
43#[repr(packed)]
44#[derive(Debug, Clone, Copy)]
45pub struct u16x4(u16, u16, u16, u16);
46
47/// 4x16-bit vectors
48#[repr(packed)]
49#[derive(Debug, Clone, Copy)]
50pub struct i16x4(i16, i16, i16, i16);
51
52/// 4x16-bit vectors
53#[repr(packed)]
54#[derive(Debug, Clone, Copy)]
55pub struct bool16x4(i16, i16, i16, i16);
56
57/// 4x32-bit vectors
58#[repr(packed)]
59#[derive(Debug, Clone, Copy)]
60pub struct u32x4(u32, u32, u32, u32);
61
62#[repr(packed)]
63#[derive(Debug, Clone, Copy)]
64pub struct i32x4(i32, i32, i32, i32);
65
66#[repr(packed)]
67#[derive(Debug, Clone, Copy)]
68pub struct f32x4(f32, f32, f32, f32);
69
70#[repr(packed)]
71#[derive(Debug, Clone, Copy)]
72pub struct bool32x4(i32, i32, i32, i32);
73
74/// 8x8-bit integer vectors
75#[repr(packed)]
76#[derive(Debug, Clone, Copy)]
77pub struct u8x8(u8, u8, u8, u8,
78                 u8, u8, u8, u8);
79
80/// 8x8-bit integer vectors
81#[repr(packed)]
82#[derive(Debug, Clone, Copy)]
83pub struct i8x8(i8, i8, i8, i8,
84                 i8, i8, i8, i8);
85
86/// 16x8-bit integer vectors
87#[repr(packed)]
88#[derive(Debug, Clone, Copy)]
89pub struct u16x8(u16, u16, u16, u16,
90                 u16, u16, u16, u16);
91
92#[repr(packed)]
93#[derive(Debug, Clone, Copy)]
94pub struct i16x8(i16, i16, i16, i16,
95                 i16, i16, i16, i16);
96                 
97#[repr(packed)]
98#[derive(Debug, Clone, Copy)]
99pub struct bool8x8(i8, i8, i8, i8,
100                     i8, i8, i8, i8);
101
102#[repr(packed)]
103#[derive(Debug, Clone, Copy)]
104pub struct bool16x8(i16, i16, i16, i16,
105                     i16, i16, i16, i16);
106
107/// 8x16-bit integer vectors
108#[repr(packed)]
109#[derive(Debug, Clone, Copy)]
110pub struct u8x16(u8, u8, u8, u8, u8, u8, u8, u8,
111                 u8, u8, u8, u8, u8, u8, u8, u8);
112
113#[repr(packed)]
114#[derive(Debug, Clone, Copy)]
115pub struct i8x16(i8, i8, i8, i8, i8, i8, i8, i8,
116                 i8, i8, i8, i8, i8, i8, i8, i8);
117                 
118#[repr(packed)]
119#[derive(Debug, Clone, Copy)]
120pub struct bool8x16(i8, i8, i8, i8, i8, i8, i8, i8,
121                     i8, i8, i8, i8, i8, i8, i8, i8);
122
123
124/// 2x64-bit vectors
125#[repr(packed)]
126#[derive(Debug, Clone, Copy)]
127pub struct u64x2(u64, u64);
128
129#[repr(packed)]
130#[derive(Debug, Clone, Copy)]
131pub struct i64x2(i64, i64);
132
133#[repr(packed)]
134#[derive(Debug, Clone, Copy)]
135pub struct f64x2(pub f64, pub f64);
136
137#[repr(packed)]
138#[derive(Debug, Clone, Copy)]
139pub struct bool64x2(i64, i64);
140
141
142/// 4x64-bit vectors
143#[repr(packed)]
144#[derive(Debug, Clone, Copy)]
145pub struct u64x4(u64, u64, u64, u64);
146
147#[repr(packed)]
148#[derive(Debug, Clone, Copy)]
149pub struct i64x4(i64, i64, i64, i64);
150
151#[repr(packed)]
152#[derive(Debug, Clone, Copy)]
153pub struct f64x4(f64, f64, f64, f64);
154
155#[repr(packed)]
156#[derive(Debug, Clone, Copy)]
157pub struct bool64x4(i64, i64, i64, i64);
158
159
160/// 8x32-bit vectors
161#[repr(packed)]
162#[derive(Debug, Clone, Copy)]
163pub struct u32x8(u32, u32, u32, u32,
164                 u32, u32, u32, u32);
165                 
166#[repr(packed)]
167#[derive(Debug, Clone, Copy)]
168pub struct i32x8(i32, i32, i32, i32,
169                 i32, i32, i32, i32);
170                 
171#[repr(packed)]
172#[derive(Debug, Clone, Copy)]
173pub struct f32x8(f32, f32, f32, f32,
174                 f32, f32, f32, f32);
175
176#[repr(packed)]
177#[derive(Debug, Clone, Copy)]
178pub struct bool32x8(i32, i32, i32, i32,
179                     i32, i32, i32, i32);
180                  
181/// 16x16-bit integer vectors
182#[repr(packed)]
183#[derive(Debug, Clone, Copy)]
184pub struct u16x16(u16, u16, u16, u16, u16, u16, u16, u16,
185                  u16, u16, u16, u16, u16, u16, u16, u16);
186                  
187#[repr(packed)]
188#[derive(Debug, Clone, Copy)]
189pub struct i16x16(i16, i16, i16, i16, i16, i16, i16, i16,
190                  i16, i16, i16, i16, i16, i16, i16, i16);
191
192#[repr(packed)]
193#[derive(Debug, Clone, Copy)]
194pub struct bool16x16(i16, i16, i16, i16, i16, i16, i16, i16,
195                      i16, i16, i16, i16, i16, i16, i16, i16);
196
197/// 32x8-bit integer vector
198#[repr(packed)]
199#[derive(Debug, Clone, Copy)]
200pub struct u8x32(u8, u8, u8, u8, u8, u8, u8, u8,
201                 u8, u8, u8, u8, u8, u8, u8, u8,
202                 u8, u8, u8, u8, u8, u8, u8, u8,
203                 u8, u8, u8, u8, u8, u8, u8, u8);
204                 
205#[repr(packed)]
206#[derive(Debug, Clone, Copy)]
207pub struct i8x32(i8, i8, i8, i8, i8, i8, i8, i8,
208                 i8, i8, i8, i8, i8, i8, i8, i8,
209                 i8, i8, i8, i8, i8, i8, i8, i8,
210                 i8, i8, i8, i8, i8, i8, i8, i8);
211
212#[repr(packed)]
213#[derive(Debug, Clone, Copy)]
214pub struct bool8x32(i8, i8, i8, i8, i8, i8, i8, i8,
215                    i8, i8, i8, i8, i8, i8, i8, i8,
216                    i8, i8, i8, i8, i8, i8, i8, i8,
217                    i8, i8, i8, i8, i8, i8, i8, i8);
218
219macro_rules! impl_load_store {
220    ($name: ident : $elem: ident, $($index:tt),*) => {
221            /// Load instance from an array
222            #[inline(always)]
223            pub fn load(array: &[$elem], idx: usize) -> Self {
224                $name($(array[idx + $index]),*)
225            }
226
227            /// Instantiates a new vector with the values of the slice.
228            #[inline(always)]
229            pub fn from_slice_aligned(slice: &[$elem]) -> Self {
230                $name($(slice[$index]),*)
231            }
232
233            /// Instantiates a new vector with the values of the slice.
234            #[inline(always)]
235            pub fn from_slice_unaligned(slice: &[$elem]) -> Self {
236                Self::from_slice_aligned(slice)
237            }
238
239            /// Instantiates a new vector with the values of the slice.
240            #[inline(always)]
241            pub unsafe fn from_slice_aligned_unchecked(slice: &[$elem]) -> Self {
242                $name($(*slice.get_unchecked($index)),*)
243            }
244
245            /// Instantiates a new vector with the values of the slice.
246            #[inline(always)]
247            pub unsafe fn from_slice_unaligned_unchecked(slice: &[$elem]) -> Self {
248                Self::from_slice_aligned_unchecked(slice)
249            }
250
251            /// Store self to an array
252            #[inline(always)]
253            pub fn store(self, array: &mut [$elem], idx: usize) {
254                $(array[idx + $index] = self.$index);*
255            }
256
257            /// Writes the values of the vector to the slice.
258            pub fn write_to_slice_aligned(self, slice: &mut [$elem]) {
259                $(slice[$index] = self.$index);*
260            }
261
262            /// Writes the values of the vector to the slice.
263            pub fn write_to_slice_unaligned(self, slice: &mut [$elem]) {
264                self.write_to_slice_aligned(slice)
265            }
266
267            /// Writes the values of the vector to the slice.
268            pub unsafe fn write_to_slice_aligned_unchecked(self, slice: &mut [$elem]) {
269                $(*slice.get_unchecked_mut($index) = self.$index);*
270            }
271
272            /// Writes the values of the vector to the slice.
273            pub unsafe fn write_to_slice_unaligned_unchecked(self, slice: &mut [$elem]) {
274                self.write_to_slice_aligned_unchecked(slice)
275            }
276    }
277}
278
279macro_rules! basic_impls {
280    ($(
281        $name: ident : $elem: ident, 
282        $bool_name: ident : $bool_elem: ident, 
283        $length: expr,
284        $($index:tt : $field:ident),*;
285    )*) => {
286        
287        $(impl $name {
288            /// Create new instance
289            #[inline(always)]
290            pub fn new($($field: $elem),*) -> Self {
291                $name($($field),*)
292            }
293            
294            /// Create new instance with all lanes set to a value
295            #[inline(always)]
296            pub fn splat(x: $elem) -> Self {
297                Self {$($index : x),*}
298            }
299            
300            /// Get the `idx`th lane value
301            #[inline(always)]
302            pub fn extract(self, idx: usize) -> $elem {
303                assert!(idx < $length);
304                unsafe {
305                    let p = (&self) as *const _ as *const $elem;
306                    *(p.offset(idx as isize))
307                }
308            }
309            
310            /// Replace the `idx`th lane with new value
311            #[inline(always)]
312            pub fn replace(self, idx: usize, elem: $elem) -> Self {
313                assert!(idx < $length);
314                unsafe {
315                    let mut ret = self;
316                    let p = (&mut ret) as *mut _ as *mut $elem;
317                    *(p.offset(idx as isize)) = elem;
318                    ret
319                }
320            }
321
322            impl_load_store!{$name: $elem, $($index),*}
323
324            /// Compare if equal
325            #[inline(always)]
326            pub fn eq(self, rhs: Self) -> $bool_name {
327                $bool_name($((self.$index == rhs.$index) as $bool_elem),*)
328            }
329            
330            /// Compare if not equal
331            #[inline(always)]
332            pub fn ne(self, rhs: Self) -> $bool_name {
333                $bool_name($((self.$index != rhs.$index) as $bool_elem),*)
334            }
335            
336            /// Compare if less than
337            #[inline(always)]
338            pub fn lt(self, rhs: Self) -> $bool_name {
339                $bool_name($((self.$index < rhs.$index) as $bool_elem),*)
340            }
341            
342            /// Compare if less than or equal
343            #[inline(always)]
344            pub fn le(self, rhs: Self) -> $bool_name {
345                $bool_name($((self.$index <= rhs.$index) as $bool_elem),*)
346            }
347            
348            /// Compare if greater than
349            #[inline(always)]
350            pub fn gt(self, rhs: Self) -> $bool_name {
351                $bool_name($((self.$index > rhs.$index) as $bool_elem),*)
352            }
353            
354            /// Compare if greater than or equal
355            #[inline(always)]
356            pub fn ge(self, rhs: Self) -> $bool_name {
357                $bool_name($((self.$index >= rhs.$index) as $bool_elem),*)
358            }
359            
360            /// Get max values by lane
361            #[inline(always)]
362            pub fn max(self, rhs: Self) -> Self {
363                $name($(if self.$index > rhs.$index { self.$index } else {rhs.$index}),*)
364            }
365            
366            /// Get min values by lane            
367            #[inline(always)]
368            pub fn min(self, rhs: Self) -> Self {
369                $name($(if self.$index < rhs.$index { self.$index } else {rhs.$index}),*)
370            }
371        })*
372    }
373}
374
375basic_impls! {
376    u32x2:u32, bool32x2:i32, 2, 0:x0, 1:x1;
377    i32x2:i32, bool32x2:i32, 2, 0:x0, 1:x1;
378    f32x2:f32, bool32x2:i32, 2, 0:x0, 1:x1;
379    
380     u8x4:u8 ,  bool8x4:i8 , 4, 0:x0, 1:x1, 2:x2, 3:x3;
381     i8x4:i8 ,  bool8x4:i8 , 4, 0:x0, 1:x1, 2:x2, 3:x3;
382    u16x4:u16, bool16x4:i16, 4, 0:x0, 1:x1, 2:x2, 3:x3;
383    i16x4:i16, bool16x4:i16, 4, 0:x0, 1:x1, 2:x2, 3:x3;
384    u32x4:u32, bool32x4:i32, 4, 0:x0, 1:x1, 2:x2, 3:x3;
385    i32x4:i32, bool32x4:i32, 4, 0:x0, 1:x1, 2:x2, 3:x3;
386    f32x4:f32, bool32x4:i32, 4, 0:x0, 1:x1, 2:x2, 3:x3;
387    
388     u8x8:u8 , bool8x8 :i8 , 8, 0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
389     i8x8:i8 , bool8x8 :i8 , 8, 0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
390    u16x8:u16, bool16x8:i16, 8, 0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
391    i16x8:i16, bool16x8:i16, 8, 0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
392   
393    u64x2:u64, bool64x2:i64, 2, 0:x0, 1:x1;
394    i64x2:i64, bool64x2:i64, 2, 0:x0, 1:x1;
395    f64x2:f64, bool64x2:i64, 2, 0:x0, 1:x1;
396    
397    u64x4:u64, bool64x4:i64, 4, 0:x0, 1:x1 ,2:x2, 3:x3;
398    i64x4:i64, bool64x4:i64, 4, 0:x0, 1:x1 ,2:x2, 3:x3;
399    f64x4:f64, bool64x4:i64, 4, 0:x0, 1:x1 ,2:x2, 3:x3;
400    
401    u32x8:u32, bool32x8:i32, 8, 0:x0, 1:x1 ,2:x2, 3:x3, 4:x4, 5:x5 ,6:x6, 7:x7;
402    i32x8:i32, bool32x8:i32, 8, 0:x0, 1:x1 ,2:x2, 3:x3, 4:x4, 5:x5 ,6:x6, 7:x7;
403    f32x8:f32, bool32x8:i32, 8, 0:x0, 1:x1 ,2:x2, 3:x3, 4:x4, 5:x5 ,6:x6, 7:x7;
404    
405    u16x16:u16, bool16x16:i16, 16,  0:x0, 1:x1 , 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7,
406                                    8:x8, 9:x9 , 10:x10, 11:x11, 12:x12, 13:x13 ,14:x14, 15:x15;
407                               
408    i16x16:i16, bool16x16:i16, 16,  0:x0, 1:x1 , 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7,
409                                    8:x8, 9:x9 , 10:x10, 11:x11, 12:x12, 13:x13 ,14:x14, 15:x15;
410                               
411    u8x32:u8, bool8x32:i8, 32,      0:x0, 1:x1 , 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7,
412                                    8:x8, 9:x9 , 10:x10, 11:x11, 12:x12, 13:x13 ,14:x14, 15:x15,
413                                    16:x16, 17:x17 , 18:x18, 19:x19, 20:x20, 21:x21 ,22:x22, 23:x23,
414                                    24:x24, 25:x25 , 26:x26, 27:x27, 28:x28, 29:x29 ,30:x30, 31:x31;
415                            
416    i8x32:i8, bool8x32:i8, 32,      0:x0, 1:x1 , 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7,
417                                    8:x8, 9:x9 , 10:x10, 11:x11, 12:x12, 13:x13 ,14:x14, 15:x15,
418                                    16:x16, 17:x17 , 18:x18, 19:x19, 20:x20, 21:x21 ,22:x22, 23:x23,
419                                    24:x24, 25:x25 , 26:x26, 27:x27, 28:x28, 29:x29 ,30:x30, 31:x31;
420}
421
422macro_rules! int_impls {
423    ($(
424        $name: ident : $elem: ident, 
425        $($index:tt : $field:ident),*;
426     )*) => {
427        
428        $(impl $name {
429            /// Select between elements of `then` and `else_`, based on
430            /// the corresponding element of `self`.
431            /// Equivalent to:
432            /// T::new(if self.0 { then.0 } else { else_.0 },
433            ///        if self.1 { then.1 } else { else_.1 },
434            ///        ...)
435            #[inline(always)]
436            pub fn select(&self, then: Self, else_ : Self) -> Self {
437                $name($((self.$index & then.$index) | (!self.$index & else_.$index)),*)
438            }
439        })*
440
441
442        /// Add trait (+)
443        /// The addittion wraps over if it is over the type limits
444        $(impl Add for $name {
445            type Output = Self;
446            #[inline(always)]
447            fn add(self, rhs: Self) -> Self {
448                $name($(self.$index.wrapping_add(rhs.$index)),*)
449            }
450        })*
451
452        /// Sub trait (-)
453        $(impl Sub for $name {
454            type Output = Self;
455            #[inline(always)]
456            fn sub(self, rhs: Self) -> Self {
457                $name($(self.$index.wrapping_sub(rhs.$index)),*)
458            }
459        })*
460
461        /// Mul trait (*)
462        $(impl Mul for $name {
463            type Output = Self;
464            #[inline(always)]
465            fn mul(self, rhs: Self) -> Self {
466                $name($(self.$index.wrapping_mul(rhs.$index)),*)
467            }
468        })*
469
470        /// Div trait (/)
471        $(impl Div for $name {
472            type Output = Self;
473            #[inline(always)]
474            fn div(self, rhs: Self) -> Self {
475                $name($(self.$index.wrapping_div(rhs.$index)),*)
476            }
477        })*
478
479        /// BitAnd trait (&)
480        $(impl BitAnd for $name {
481            type Output = Self;
482            #[inline(always)]
483            fn bitand(self, rhs: Self) -> Self {
484                $name($(self.$index & rhs.$index),*)
485            }
486        })*
487            
488        /// BitOr trait (|)
489        $(impl BitOr for $name {            
490            type Output = Self;
491            #[inline(always)]
492            fn bitor(self, rhs: Self) -> Self {
493                $name($(self.$index | rhs.$index),*)
494            }
495        })*
496        
497        /// BitXor trait (^)        
498        $(impl BitXor for $name {            
499            type Output = Self;
500            #[inline(always)]
501            fn bitxor(self, rhs: Self) -> Self {
502                $name($(self.$index ^ rhs.$index),*)
503            }
504        })*
505        
506        /// Not trait (!)
507        $(impl Not for $name {
508            type Output = Self;
509
510            #[inline(always)]
511            fn not(self) -> Self {
512              $name($(!self.$index),*)
513            }
514        })*
515        
516        /// Shift left trait (<<)
517        $(impl Shl<u32> for $name {
518            type Output = Self;
519            #[inline(always)]
520            fn shl(self, sz: u32) -> Self {
521                $name($(self.$index << (sz as $elem)),*)
522            }
523        })*
524        
525        /// Shift right trait (>>)
526        $(impl Shr<u32> for $name {
527            type Output = Self;
528            #[inline(always)]
529            fn shr(self, sz: u32) -> Self {
530                $name($(self.$index >> (sz as $elem)),*)
531            }
532        })*
533    }
534}
535
536int_impls! {
537    u32x2:u32,  0:x0, 1:x1;
538    i32x2:i32,  0:x0, 1:x1;
539
540     u8x4:u8 ,  0:x0, 1:x1, 2:x2, 3:x3;
541     i8x4:i8 ,  0:x0, 1:x1, 2:x2, 3:x3;
542    u16x4:u16,  0:x0, 1:x1, 2:x2, 3:x3;
543    i16x4:i16,  0:x0, 1:x1, 2:x2, 3:x3;
544    u32x4:u32,  0:x0, 1:x1, 2:x2, 3:x3;
545    i32x4:i32,  0:x0, 1:x1, 2:x2, 3:x3;
546
547     u8x8:u8 ,  0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
548     i8x8:i8 ,  0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
549    u16x8:u16,  0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
550    i16x8:i16,  0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
551
552    u64x2:u64,  0:x0, 1:x1;
553    i64x2:i64,  0:x0, 1:x1;
554    
555    u64x4:u64,  0:x0, 1:x1 ,2:x2, 3:x3;
556    i64x4:i64,  0:x0, 1:x1 ,2:x2, 3:x3;
557    
558    u32x8:u32,  0:x0, 1:x1 ,2:x2, 3:x3, 4:x4, 5:x5 ,6:x6, 7:x7;
559    i32x8:i32,  0:x0, 1:x1 ,2:x2, 3:x3, 4:x4, 5:x5 ,6:x6, 7:x7;
560    
561    u16x16:u16, 0:x0, 1:x1 , 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7,
562                8:x8, 9:x9 , 10:x10, 11:x11, 12:x12, 13:x13 ,14:x14, 15:x15;
563                
564    i16x16:i16, 0:x0, 1:x1 , 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7,
565                8:x8, 9:x9 , 10:x10, 11:x11, 12:x12, 13:x13 ,14:x14, 15:x15;
566                               
567    u8x32:u8,   0:x0, 1:x1 , 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7,
568                8:x8, 9:x9 , 10:x10, 11:x11, 12:x12, 13:x13 ,14:x14, 15:x15,
569                16:x16, 17:x17 , 18:x18, 19:x19, 20:x20, 21:x21 ,22:x22, 23:x23,
570                24:x24, 25:x25 , 26:x26, 27:x27, 28:x28, 29:x29 ,30:x30, 31:x31;
571                
572    i8x32:i8,   0:x0, 1:x1 , 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7,
573                8:x8, 9:x9 , 10:x10, 11:x11, 12:x12, 13:x13 ,14:x14, 15:x15,
574                16:x16, 17:x17 , 18:x18, 19:x19, 20:x20, 21:x21 ,22:x22, 23:x23,
575                24:x24, 25:x25 , 26:x26, 27:x27, 28:x28, 29:x29 ,30:x30, 31:x31;
576}
577
578macro_rules! bool_impls {
579    ($(
580        $name: ident : $elem: ident, 
581        $length: expr,
582        $($index:tt : $field:ident),*;
583        )*) => {
584        
585        $(impl $name {
586            /// Create new instance
587            #[inline(always)]
588            pub fn new($($field: $elem),*) -> Self {
589                $name($($field),*)
590            }
591
592            /// Create new instance with all lanes set to a value
593            #[inline(always)]
594            pub fn splat(x: $elem) -> Self {
595                Self {$($index : x),*}
596            }
597            
598            /// Get the `idx`th lane value
599            #[inline(always)]
600            pub fn extract(self, idx: usize) -> $elem {
601                assert!(idx < $length);
602                unsafe {
603                    let p = (&self) as *const _ as *const $elem;
604                    *(p.offset(idx as isize))
605                }
606            }
607            
608            /// Replace the `idx`th lane with new value
609            #[inline(always)]
610            pub fn replace(self, idx: usize, elem: $elem) -> Self {
611                assert!(idx < $length);
612                unsafe {
613                    let mut ret = self;
614                    let p = (&mut ret) as *mut _ as *mut $elem;
615                    *(p.offset(idx as isize)) = elem;
616                    ret
617                }
618            }
619
620            impl_load_store!{$name: $elem, $($index),*}
621
622            /// Check if all lanes are true
623            #[inline(always)]
624            pub fn all(self) -> bool {
625                $((self.$index != 0)) && *
626            }
627
628
629            /// Check if at least one lane is true
630            #[inline(always)]
631            pub fn any(self) -> bool {
632                $((self.$index != 0)) || *
633            }            
634       })*
635       
636       /// Not trait (!)
637       $(impl Not for $name {
638            type Output = Self;
639
640            #[inline(always)]
641            fn not(self) -> Self {
642              $name($(!self.$index),*)
643            }
644       })*
645    }
646}
647
648bool_impls! {
649    bool32x2:i32, 2,    0:x0, 1:x1;
650    bool32x4:i32, 4,    0:x0, 1:x1 , 2:x2, 3:x3;
651     bool8x4:i8 , 4,    0:x0, 1:x1 , 2:x2, 3:x3;
652    bool16x4:i16, 4,    0:x0, 1:x1 , 2:x2, 3:x3;
653
654     bool8x8:i8 , 8,    0:x0, 1:x1 , 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
655    bool16x8:i16, 8,    0:x0, 1:x1 , 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
656
657    bool8x16:i8, 16,    0:x0, 1:x1 , 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7,
658                        8:x8, 9:x9 , 10:x10, 11:x11, 12:x12, 13:x13 ,14:x14, 15:x15;
659                    
660    bool64x2:i64, 2,    0:x0, 1:x1;
661    bool64x4:i64, 4,    0:x0, 1:x1 , 2:x2, 3:x3;
662    bool32x8:i32, 8,    0:x0, 1:x1 , 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
663    
664    bool16x16:i16, 16,  0:x0, 1:x1 , 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7,
665                        8:x8, 9:x9 , 10:x10, 11:x11, 12:x12, 13:x13 ,14:x14, 15:x15;
666                    
667    bool8x32:i8, 32,    0:x0, 1:x1 , 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7,
668                        8:x8, 9:x9 , 10:x10, 11:x11, 12:x12, 13:x13 ,14:x14, 15:x15,
669                        16:x16, 17:x17 , 18:x18, 19:x19, 20:x20, 21:x21 ,22:x22, 23:x23,
670                        24:x24, 25:x25 , 26:x26, 27:x27, 28:x28, 29:x29 ,30:x30, 31:x31;
671}
672
673macro_rules! float_impls {
674    ($(
675        $name: ident, 
676        $($index:tt : $field:ident),*;
677        )*) => {
678        
679        $(
680            impl $name {
681                /// Get square root
682                #[inline]
683                pub fn sqrt(self) -> Self {
684                    $name($(self.$index.sqrt()),*)
685                }
686
687                /// Get reciprocal of square root
688                #[inline]
689                pub fn approx_rsqrt(self) -> Self {
690                    $name($(1.0 / self.$index.sqrt()),*)
691                }
692
693                /// Get reciprocal
694                #[inline]
695                pub fn approx_reciprocal(self) -> Self {
696                    $name($(1.0 / self.$index),*)
697                }
698           }
699
700
701        /// Add trait (+)
702        /// The addittion wraps over if it is over the type limits
703        impl Add for $name {
704            type Output = Self;
705            #[inline(always)]
706            fn add(self, rhs: Self) -> Self {
707                $name($(self.$index + rhs.$index),*)
708            }
709        }
710
711        /// Sub trait (-)
712        impl Sub for $name {
713            type Output = Self;
714            #[inline(always)]
715            fn sub(self, rhs: Self) -> Self {
716                $name($(self.$index - rhs.$index),*)
717            }
718        }
719
720        /// Mul trait (*)
721        impl Mul for $name {
722            type Output = Self;
723            #[inline(always)]
724            fn mul(self, rhs: Self) -> Self {
725                $name($(self.$index * rhs.$index),*)
726            }
727        }
728
729        /// Div trait (/)
730        impl Div for $name {
731            type Output = Self;
732            #[inline(always)]
733            fn div(self, rhs: Self) -> Self {
734                $name($(self.$index / rhs.$index),*)
735            }
736        }
737
738       )*
739    }
740}
741
742float_impls! {
743    f32x2, 0:x0, 1:x1;
744    f32x4, 0:x0, 1:x1 , 2:x2, 3:x3;
745    f32x8, 0:x0, 1:x1 , 2:x2, 3:x3, 4:x4, 5:x5 , 6:x6, 7:x7;    
746    f64x2, 0:x0, 1:x1;
747    f64x4, 0:x0, 1:x1 , 2:x2, 3:x3;
748}
749
750pub trait FromCast<T>: Sized {
751    /// Numeric cast from `T` to `Self`.
752    fn from_cast(_: T) -> Self;
753}
754
755macro_rules! conv_impls {
756    ($(
757        $cvt: ident,
758        $from_name: ident : $from_elem: ident -> $to_name : ident : $to_elem : ident,
759        $($index:tt : $field:ident),*;
760        )*) => {
761            
762        $(
763        impl $from_name {
764            #[inline(always)]
765            pub fn $cvt(self) -> $to_name {
766                $to_name($(self.$index as $to_elem), *)
767            }
768        }
769
770        impl From<$from_name> for $to_name {
771            /// Warning ! The conversion can be lossy
772            #[inline(always)]
773            fn from(source: $from_name) -> $to_name {
774                source.$cvt()
775            }
776        }
777
778        impl FromCast<$from_name> for $to_name {
779            #[inline(always)]
780            fn from_cast(source: $from_name) -> $to_name {
781                source.$cvt()
782            }
783        }
784        )*
785    }
786}
787
788// Conversion among types
789conv_impls! {
790    to_i, u32x2 : u32 -> i32x2 : i32,       0:x0, 1:x1;
791    to_i, f32x2 : f32 -> i32x2 : i32,       0:x0, 1:x1;    
792    to_u, i32x2 : i32 -> u32x2 : u32,       0:x0, 1:x1;
793    to_u, f32x2 : f32 -> u32x2 : u32,       0:x0, 1:x1;    
794    to_f, u32x2 : u32 -> f32x2 : f32,       0:x0, 1:x1;
795    to_f, i32x2 : i32 -> f32x2 : f32,       0:x0, 1:x1;
796    to_i, bool32x2 : i32 -> i32x2 : i32,    0:x0, 1:x1;
797    to_u, bool32x2 : i32 -> u32x2 : u32,    0:x0, 1:x1;
798    
799    to_i, u32x4 : u32 -> i32x4 : i32,       0:x0, 1:x1, 2:x2, 3:x3;
800    to_i, f32x4 : f32 -> i32x4 : i32,       0:x0, 1:x1, 2:x2, 3:x3;    
801    to_u, i32x4 : i32 -> u32x4 : u32,       0:x0, 1:x1, 2:x2, 3:x3;
802    to_u, f32x4 : f32 -> u32x4 : u32,       0:x0, 1:x1, 2:x2, 3:x3;    
803    to_f, u32x4 : u32 -> f32x4 : f32,       0:x0, 1:x1, 2:x2, 3:x3;
804    to_f, i32x4 : i32 -> f32x4 : f32,       0:x0, 1:x1, 2:x2, 3:x3;
805    to_i, bool32x4 : i32 -> i32x4 : i32,    0:x0, 1:x1, 2:x2, 3:x3;
806    to_u, bool32x4 : i32 -> u32x4 : u32,    0:x0, 1:x1, 2:x2, 3:x3;
807    
808    to_i, u32x8 : u32 -> i32x8 : i32,       0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
809    to_i, f32x8 : f32 -> i32x8 : i32,       0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;    
810    to_u, i32x8 : i32 -> u32x8 : u32,       0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
811    to_u, f32x8 : f32 -> u32x8 : u32,       0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;    
812    to_f, u32x8 : u32 -> f32x8 : f32,       0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
813    to_f, i32x8 : i32 -> f32x8 : f32,       0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
814    to_i, bool32x8 : i32 -> i32x8 : i32,    0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
815    to_u, bool32x8 : i32 -> u32x8 : u32,    0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
816    
817    to_i, u64x2 : u64 -> i64x2 : i64,       0:x0, 1:x1;
818    to_i, f64x2 : f64 -> i64x2 : i64,       0:x0, 1:x1;    
819    to_u, i64x2 : i64 -> u64x2 : u64,       0:x0, 1:x1;
820    to_u, f64x2 : f64 -> u64x2 : u64,       0:x0, 1:x1;
821    to_f, u64x2 : u64 -> f64x2 : f64,       0:x0, 1:x1;
822    to_f, i64x2 : i64 -> f64x2 : f64,       0:x0, 1:x1;
823    to_i, bool64x2 : i64 -> i64x2 : i64,    0:x0, 1:x1;
824    to_u, bool64x2 : i64 -> u64x2 : u64,    0:x0, 1:x1;
825    
826    to_i, u64x4 : u64 -> i64x4 : i64,       0:x0, 1:x1, 2:x2, 3:x3;
827    to_i, f64x4 : f64 -> i64x4 : i64,       0:x0, 1:x1, 2:x2, 3:x3;    
828    to_u, i64x4 : i64 -> u64x4 : u64,       0:x0, 1:x1, 2:x2, 3:x3;
829    to_u, f64x4 : f64 -> u64x4 : u64,       0:x0, 1:x1, 2:x2, 3:x3;    
830    to_f, u64x4 : u64 -> f64x4 : f64,       0:x0, 1:x1, 2:x2, 3:x3;
831    to_f, i64x4 : i64 -> f64x4 : f64,       0:x0, 1:x1, 2:x2, 3:x3;
832    to_i, bool64x4 : i64 -> i64x4 : i64,    0:x0, 1:x1, 2:x2, 3:x3;
833    to_u, bool64x4 : i64 -> u64x4 : u64,    0:x0, 1:x1, 2:x2, 3:x3;
834    
835    to_i32, i64x2 : i64 -> i32x2 : i32,     0:x0, 1:x1;
836    to_i64, i32x2 : i32 -> i64x2 : i64,     0:x0, 1:x1;    
837    to_i32, i64x4 : i64 -> i32x4 : i32,     0:x0, 1:x1, 2:x2, 3:x3;
838    to_i64, i32x4 : i32 -> i64x4 : i64,     0:x0, 1:x1, 2:x2, 3:x3;
839    
840    to_u32, u64x2 : u64 -> u32x2 : u32,     0:x0, 1:x1;
841    to_u64, u32x2 : u32 -> u64x2 : u64,     0:x0, 1:x1;    
842    to_u32, u64x4 : u64 -> u32x4 : u32,     0:x0, 1:x1, 2:x2, 3:x3;
843    to_u64, u32x4 : u32 -> u64x4 : u64,     0:x0, 1:x1, 2:x2, 3:x3;
844    
845    to_f32, f64x2 : f64 -> f32x2 : f32,     0:x0, 1:x1;
846    to_f64, f32x2 : f32 -> f64x2 : f64,     0:x0, 1:x1;    
847    to_f32, f64x4 : f64 -> f32x4 : f32,     0:x0, 1:x1, 2:x2, 3:x3;
848    to_f64, f32x4 : f32 -> f64x4 : f64,     0:x0, 1:x1, 2:x2, 3:x3;
849
850    to_i32, i16x8 : i16 -> i32x8 : i32,     0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
851    to_i32, u16x8 : u16 -> i32x8 : i32,     0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
852    to_i32, i16x4 : i16 -> i32x4 : i32,     0:x0, 1:x1, 2:x2, 3:x3;
853    to_i32, u16x4 : u16 -> i32x4 : i32,     0:x0, 1:x1, 2:x2, 3:x3;
854
855    to_u8, i32x8 : i32 -> u8x8 : u8,     0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
856    to_u8, u32x8 : u32 -> u8x8 : u8,     0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
857    to_u8, i16x8 : i16 -> u8x8 : u8,     0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
858    to_u8, u16x8 : u16 -> u8x8 : u8,     0:x0, 1:x1, 2:x2, 3:x3, 4:x4, 5:x5, 6:x6, 7:x7;
859
860    to_u8, i32x4 : i32 -> u8x4 : u8,     0:x0, 1:x1, 2:x2, 3:x3;
861    to_u8, u32x4 : u32 -> u8x4 : u8,     0:x0, 1:x1, 2:x2, 3:x3;
862    to_u8, i16x4 : i16 -> u8x4 : u8,     0:x0, 1:x1, 2:x2, 3:x3;
863    to_u8, u16x4 : u16 -> u8x4 : u8,     0:x0, 1:x1, 2:x2, 3:x3;
864}