sti/simd/
mod.rs

1use crate::mem::PhantomData;
2
3
4/// Simd<T, N>
5/// - single instruction, multiple data.
6/// - implemented as an aligned array.
7#[derive(Clone, Copy)]
8#[repr(transparent)]
9pub struct Simd<T: SimdElement, const N: usize> where (): SimdLanes<N> {
10    v: <() as SimdLanes<N>>::Repr,
11    p: PhantomData<fn(T) -> T>
12}
13
14mod generic;
15
16
17
18/// SimdElement
19/// - the trait for supported simd element types.
20pub unsafe trait SimdElement: Copy {
21    fn se_to_u32x2(v: [Self; 2]) -> [u32; 2];
22    fn se_to_u32x4(v: [Self; 4]) -> [u32; 4];
23}
24
25mod b32x; pub use b32x::*;
26mod i32x; pub use i32x::*;
27mod u32x; pub use u32x::*;
28mod f32x; pub use f32x::*;
29
30
31
32//
33// SimdLanes
34//  - the trait implemented by each platform.
35//
36
37pub trait SimdLanes<const N: usize> {
38    type Repr: Copy;
39
40    fn repr_from_se<T: SimdElement>(v: [T; N]) -> Self::Repr;
41
42    fn repr_zip(lhs: Self::Repr, rhs: Self::Repr) -> (Self::Repr, Self::Repr);
43    fn repr_unzip(lhs: Self::Repr, rhs: Self::Repr) -> (Self::Repr, Self::Repr);
44
45
46    fn b32_splat(v: B32) -> Self::Repr;
47
48    fn b32_select(mask: Self::Repr, on_true: Self::Repr, on_false: Self::Repr) -> Self::Repr;
49
50    fn b32_none(v: Self::Repr) -> bool;
51    fn b32_any(v: Self::Repr) -> bool;
52    fn b32_all(v: Self::Repr) -> bool;
53
54    fn b32_and(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
55    fn b32_or(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
56    fn b32_not(v: Self::Repr) -> Self::Repr;
57
58
59    fn i32_splat(v: i32) -> Self::Repr;
60
61    fn i32_to_f32(v: Self::Repr) -> Self::Repr;
62
63    fn i32_min(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
64    fn i32_max(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
65
66    fn i32_eq(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
67    fn i32_ne(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
68    fn i32_le(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
69    fn i32_lt(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
70    fn i32_ge(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
71    fn i32_gt(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
72
73    fn i32_add(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
74    fn i32_sub(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
75    fn i32_neg(v: Self::Repr) -> Self::Repr;
76
77    fn i32_shl(v: Self::Repr, shift: i32) -> Self::Repr;
78    fn i32_shr(v: Self::Repr, shift: i32) -> Self::Repr;
79
80
81    fn u32_splat(v: u32) -> Self::Repr;
82
83    fn u32_as_i32(v: Self::Repr) -> Self::Repr;
84
85    fn u32_min(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
86    fn u32_max(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
87
88    fn u32_le(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
89    fn u32_lt(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
90    fn u32_ge(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
91    fn u32_gt(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
92
93    fn u32_shr(v: Self::Repr, shift: u32) -> Self::Repr;
94
95    fn u32_and(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
96    fn u32_or(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
97    fn u32_not(v: Self::Repr) -> Self::Repr;
98
99
100    fn f32_splat(v: f32) -> Self::Repr;
101
102    fn f32_to_i32_unck(v: Self::Repr) -> Self::Repr;
103    fn f32_to_i32(v: Self::Repr) -> Self::Repr;
104
105    fn f32_floor(v: Self::Repr) -> Self::Repr;
106    fn f32_ceil(v: Self::Repr) -> Self::Repr;
107    fn f32_round(v: Self::Repr) -> Self::Repr;
108    fn f32_trunc(v: Self::Repr) -> Self::Repr;
109    fn f32_abs(v: Self::Repr) -> Self::Repr;
110    fn f32_sqrt(v: Self::Repr) -> Self::Repr;
111    fn f32_with_sign_of(v: Self::Repr, sign: Self::Repr) -> Self::Repr;
112
113    fn f32_hadd(v: Self::Repr) -> f32;
114
115    fn f32_min(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
116    fn f32_max(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
117
118    fn f32_eq(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
119    fn f32_ne(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
120    fn f32_le(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
121    fn f32_lt(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
122    fn f32_ge(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
123    fn f32_gt(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
124
125    fn f32_neg(v: Self::Repr) -> Self::Repr;
126    fn f32_add(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
127    fn f32_sub(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
128    fn f32_mul(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
129    fn f32_div(lhs: Self::Repr, rhs: Self::Repr) -> Self::Repr;
130}
131
132mod scalar;
133
134
135mod tests;
136
137