Skip to main content

rill_core/math/vector/simd/
x86.rs

1//! # x86/x86_64 SIMD реализации
2//!
3//! Использует SSE2, SSE4.1, AVX, AVX2 и AVX512 инструкции для векторных операций.
4
5#![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6#![allow(unused_imports)]
7#![allow(dead_code)]
8
9use super::super::traits::*;
10use crate::Transcendental;
11
12// -----------------------------------------------------------------------------
13// SIMD типы
14// -----------------------------------------------------------------------------
15
16/// Вектор из 4 элементов f32 (SSE)
17#[derive(Copy, Clone, Debug, PartialEq)]
18#[repr(transparent)]
19pub struct F32x4([f32; 4]);
20
21/// Вектор из 8 элементов f32 (AVX)
22#[derive(Copy, Clone, Debug, PartialEq)]
23#[repr(transparent)]
24pub struct F32x8([f32; 8]);
25
26/// Вектор из 16 элементов f32 (AVX512)
27#[derive(Copy, Clone, Debug, PartialEq)]
28#[repr(transparent)]
29pub struct F32x16([f32; 16]);
30
31/// Вектор из 2 элементов f64 (SSE2)
32#[derive(Copy, Clone, Debug, PartialEq)]
33#[repr(transparent)]
34pub struct F64x2([f64; 2]);
35
36/// Вектор из 4 элементов f64 (AVX)
37#[derive(Copy, Clone, Debug, PartialEq)]
38#[repr(transparent)]
39pub struct F64x4([f64; 4]);
40
41/// Вектор из 8 элементов f64 (AVX512)
42#[derive(Copy, Clone, Debug, PartialEq)]
43#[repr(transparent)]
44pub struct F64x8([f64; 8]);
45
46// -----------------------------------------------------------------------------
47// Реализация Vector для F32x4
48// -----------------------------------------------------------------------------
49
50impl Vector<f32, 4> for F32x4 {
51    fn splat(value: f32) -> Self {
52        F32x4([value; 4])
53    }
54
55    fn load(slice: &[f32]) -> Self {
56        let mut arr = [0.0; 4];
57        arr.copy_from_slice(&slice[0..4]);
58        F32x4(arr)
59    }
60
61    fn store(&self, slice: &mut [f32]) {
62        slice[0..4].copy_from_slice(&self.0);
63    }
64
65    fn extract(&self, index: usize) -> f32 {
66        self.0[index]
67    }
68
69    fn insert(&self, index: usize, value: f32) -> Self {
70        let mut arr = self.0;
71        arr[index] = value;
72        F32x4(arr)
73    }
74
75    fn add(&self, other: &Self) -> Self {
76        let mut arr = [0.0; 4];
77        for i in 0..4 {
78            arr[i] = self.0[i] + other.0[i];
79        }
80        F32x4(arr)
81    }
82
83    fn sub(&self, other: &Self) -> Self {
84        let mut arr = [0.0; 4];
85        for i in 0..4 {
86            arr[i] = self.0[i] - other.0[i];
87        }
88        F32x4(arr)
89    }
90
91    fn mul(&self, other: &Self) -> Self {
92        let mut arr = [0.0; 4];
93        for i in 0..4 {
94            arr[i] = self.0[i] * other.0[i];
95        }
96        F32x4(arr)
97    }
98
99    fn div(&self, other: &Self) -> Self {
100        let mut arr = [0.0; 4];
101        for i in 0..4 {
102            arr[i] = self.0[i] / other.0[i];
103        }
104        F32x4(arr)
105    }
106
107    fn rem(&self, other: &Self) -> Self {
108        let mut arr = [0.0; 4];
109        for i in 0..4 {
110            arr[i] = self.0[i] % other.0[i];
111        }
112        F32x4(arr)
113    }
114
115    fn neg(&self) -> Self {
116        let mut arr = [0.0; 4];
117        for i in 0..4 {
118            arr[i] = -self.0[i];
119        }
120        F32x4(arr)
121    }
122
123    fn abs(&self) -> Self {
124        let mut arr = [0.0; 4];
125        for i in 0..4 {
126            arr[i] = self.0[i].abs();
127        }
128        F32x4(arr)
129    }
130
131    fn min(&self, other: &Self) -> Self {
132        let mut arr = [0.0; 4];
133        for i in 0..4 {
134            arr[i] = self.0[i].min(other.0[i]);
135        }
136        F32x4(arr)
137    }
138
139    fn max(&self, other: &Self) -> Self {
140        let mut arr = [0.0; 4];
141        for i in 0..4 {
142            arr[i] = self.0[i].max(other.0[i]);
143        }
144        F32x4(arr)
145    }
146
147    fn clamp(&self, min: &Self, max: &Self) -> Self {
148        let mut arr = [0.0; 4];
149        for i in 0..4 {
150            arr[i] = self.0[i].clamp(min.0[i], max.0[i]);
151        }
152        F32x4(arr)
153    }
154
155}
156
157impl VectorTranscendental<f32, 4> for F32x4 {
158    fn sqrt(&self) -> Self { let mut a = [0.0; 4]; for i in 0..4 { a[i] = self.0[i].sqrt(); } F32x4(a) }
159    fn exp(&self) -> Self { let mut a = [0.0; 4]; for i in 0..4 { a[i] = self.0[i].exp(); } F32x4(a) }
160    fn ln(&self) -> Self { let mut a = [0.0; 4]; for i in 0..4 { a[i] = self.0[i].ln(); } F32x4(a) }
161    fn sin(&self) -> Self { let mut a = [0.0; 4]; for i in 0..4 { a[i] = self.0[i].sin(); } F32x4(a) }
162    fn cos(&self) -> Self { let mut a = [0.0; 4]; for i in 0..4 { a[i] = self.0[i].cos(); } F32x4(a) }
163    fn tan(&self) -> Self { let mut a = [0.0; 4]; for i in 0..4 { a[i] = self.0[i].tan(); } F32x4(a) }
164}
165
166// Пока реализуем остальные типы как заглушки (скалярные версии)
167// В будущем здесь будут настоящие SIMD инструкции через core::arch::x86_64
168
169use std::ops::{Add, Div, Mul, Neg, Rem, Sub};
170
171impl Add for F32x4 {
172    type Output = Self;
173
174    fn add(self, rhs: Self) -> Self {
175        let mut arr = [0.0; 4];
176        for i in 0..4 {
177            arr[i] = self.0[i] + rhs.0[i];
178        }
179        F32x4(arr)
180    }
181}
182
183impl Sub for F32x4 {
184    type Output = Self;
185
186    fn sub(self, rhs: Self) -> Self {
187        let mut arr = [0.0; 4];
188        for i in 0..4 {
189            arr[i] = self.0[i] - rhs.0[i];
190        }
191        F32x4(arr)
192    }
193}
194
195impl Mul for F32x4 {
196    type Output = Self;
197
198    fn mul(self, rhs: Self) -> Self {
199        let mut arr = [0.0; 4];
200        for i in 0..4 {
201            arr[i] = self.0[i] * rhs.0[i];
202        }
203        F32x4(arr)
204    }
205}
206
207impl Div for F32x4 {
208    type Output = Self;
209
210    fn div(self, rhs: Self) -> Self {
211        let mut arr = [0.0; 4];
212        for i in 0..4 {
213            arr[i] = self.0[i] / rhs.0[i];
214        }
215        F32x4(arr)
216    }
217}
218
219impl Rem for F32x4 {
220    type Output = Self;
221
222    fn rem(self, rhs: Self) -> Self {
223        let mut arr = [0.0; 4];
224        for i in 0..4 {
225            arr[i] = self.0[i] % rhs.0[i];
226        }
227        F32x4(arr)
228    }
229}
230
231impl Neg for F32x4 {
232    type Output = Self;
233
234    fn neg(self) -> Self {
235        let mut arr = [0.0; 4];
236        for i in 0..4 {
237            arr[i] = -self.0[i];
238        }
239        F32x4(arr)
240    }
241}
242
243impl Default for F32x4 {
244    fn default() -> Self {
245        F32x4([0.0; 4])
246    }
247}
248
249// -----------------------------------------------------------------------------
250// Тесты
251// -----------------------------------------------------------------------------
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256
257    #[test]
258    fn test_f32x4_splat() {
259        let v = F32x4::splat(2.5);
260        assert_eq!(v.extract(0), 2.5);
261        assert_eq!(v.extract(3), 2.5);
262    }
263
264    #[test]
265    fn test_f32x4_add() {
266        let a = F32x4::splat(1.0);
267        let b = F32x4::splat(2.0);
268        let c = a + b;
269        assert_eq!(c.extract(0), 3.0);
270    }
271
272    #[test]
273    fn test_f32x4_mul() {
274        let a = F32x4::splat(3.0);
275        let b = F32x4::splat(4.0);
276        let c = a * b;
277        assert_eq!(c.extract(0), 12.0);
278    }
279
280    #[test]
281    fn test_f32x4_sin() {
282        let a = F32x4::splat(0.0);
283        let b = a.sin();
284        assert_eq!(b.extract(0), 0.0);
285    }
286}