Skip to main content

rill_core/math/vector/simd/
x86.rs

1//! # x86/x86_64 SIMD реализации
2//!
3//! Использует SSE2, SSE4.1, AVX, AVX2 и AVX512 инструкции для векторных операций.
4
5#![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6#![allow(unused_imports)]
7#![allow(dead_code)]
8
9use super::super::traits::*;
10use crate::Transcendental;
11
12// -----------------------------------------------------------------------------
13// SIMD типы
14// -----------------------------------------------------------------------------
15
16/// Вектор из 4 элементов f32 (SSE)
17#[derive(Copy, Clone, Debug, PartialEq)]
18#[repr(transparent)]
19pub struct F32x4([f32; 4]);
20
21/// Вектор из 8 элементов f32 (AVX)
22#[derive(Copy, Clone, Debug, PartialEq)]
23#[repr(transparent)]
24pub struct F32x8([f32; 8]);
25
26/// Вектор из 16 элементов f32 (AVX512)
27#[derive(Copy, Clone, Debug, PartialEq)]
28#[repr(transparent)]
29pub struct F32x16([f32; 16]);
30
31/// Вектор из 2 элементов f64 (SSE2)
32#[derive(Copy, Clone, Debug, PartialEq)]
33#[repr(transparent)]
34pub struct F64x2([f64; 2]);
35
36/// Вектор из 4 элементов f64 (AVX)
37#[derive(Copy, Clone, Debug, PartialEq)]
38#[repr(transparent)]
39pub struct F64x4([f64; 4]);
40
41/// Вектор из 8 элементов f64 (AVX512)
42#[derive(Copy, Clone, Debug, PartialEq)]
43#[repr(transparent)]
44pub struct F64x8([f64; 8]);
45
46// -----------------------------------------------------------------------------
47// Реализация Vector для F32x4
48// -----------------------------------------------------------------------------
49
50impl Vector<f32, 4> for F32x4 {
51    fn splat(value: f32) -> Self {
52        F32x4([value; 4])
53    }
54
55    fn load(slice: &[f32]) -> Self {
56        let mut arr = [0.0; 4];
57        arr.copy_from_slice(&slice[0..4]);
58        F32x4(arr)
59    }
60
61    fn store(&self, slice: &mut [f32]) {
62        slice[0..4].copy_from_slice(&self.0);
63    }
64
65    fn extract(&self, index: usize) -> f32 {
66        self.0[index]
67    }
68
69    fn insert(&self, index: usize, value: f32) -> Self {
70        let mut arr = self.0;
71        arr[index] = value;
72        F32x4(arr)
73    }
74
75    fn add(&self, other: &Self) -> Self {
76        F32x4(core::array::from_fn(|i| self.0[i] + other.0[i]))
77    }
78
79    fn sub(&self, other: &Self) -> Self {
80        F32x4(core::array::from_fn(|i| self.0[i] - other.0[i]))
81    }
82
83    fn mul(&self, other: &Self) -> Self {
84        F32x4(core::array::from_fn(|i| self.0[i] * other.0[i]))
85    }
86
87    fn div(&self, other: &Self) -> Self {
88        F32x4(core::array::from_fn(|i| self.0[i] / other.0[i]))
89    }
90
91    fn rem(&self, other: &Self) -> Self {
92        F32x4(core::array::from_fn(|i| self.0[i] % other.0[i]))
93    }
94
95    fn neg(&self) -> Self {
96        F32x4(core::array::from_fn(|i| -self.0[i]))
97    }
98
99    fn abs(&self) -> Self {
100        F32x4(core::array::from_fn(|i| self.0[i].abs()))
101    }
102
103    fn min(&self, other: &Self) -> Self {
104        F32x4(core::array::from_fn(|i| self.0[i].min(other.0[i])))
105    }
106
107    fn max(&self, other: &Self) -> Self {
108        F32x4(core::array::from_fn(|i| self.0[i].max(other.0[i])))
109    }
110
111    fn clamp(&self, min: &Self, max: &Self) -> Self {
112        F32x4(core::array::from_fn(|i| {
113            self.0[i].clamp(min.0[i], max.0[i])
114        }))
115    }
116}
117
118impl VectorTranscendental<f32, 4> for F32x4 {
119    fn sqrt(&self) -> Self {
120        F32x4(core::array::from_fn(|i| self.0[i].sqrt()))
121    }
122    fn exp(&self) -> Self {
123        F32x4(core::array::from_fn(|i| self.0[i].exp()))
124    }
125    fn ln(&self) -> Self {
126        F32x4(core::array::from_fn(|i| self.0[i].ln()))
127    }
128    fn sin(&self) -> Self {
129        F32x4(core::array::from_fn(|i| self.0[i].sin()))
130    }
131    fn cos(&self) -> Self {
132        F32x4(core::array::from_fn(|i| self.0[i].cos()))
133    }
134    fn tan(&self) -> Self {
135        F32x4(core::array::from_fn(|i| self.0[i].tan()))
136    }
137}
138
139// Пока реализуем остальные типы как заглушки (скалярные версии)
140// В будущем здесь будут настоящие SIMD инструкции через core::arch::x86_64
141
142use std::ops::{Add, Div, Mul, Neg, Rem, Sub};
143
144impl Add for F32x4 {
145    type Output = Self;
146
147    fn add(self, rhs: Self) -> Self {
148        F32x4(core::array::from_fn(|i| self.0[i] + rhs.0[i]))
149    }
150}
151
152impl Sub for F32x4 {
153    type Output = Self;
154
155    fn sub(self, rhs: Self) -> Self {
156        F32x4(core::array::from_fn(|i| self.0[i] - rhs.0[i]))
157    }
158}
159
160impl Mul for F32x4 {
161    type Output = Self;
162
163    fn mul(self, rhs: Self) -> Self {
164        F32x4(core::array::from_fn(|i| self.0[i] * rhs.0[i]))
165    }
166}
167
168impl Div for F32x4 {
169    type Output = Self;
170
171    fn div(self, rhs: Self) -> Self {
172        F32x4(core::array::from_fn(|i| self.0[i] / rhs.0[i]))
173    }
174}
175
176impl Rem for F32x4 {
177    type Output = Self;
178
179    fn rem(self, rhs: Self) -> Self {
180        F32x4(core::array::from_fn(|i| self.0[i] % rhs.0[i]))
181    }
182}
183
184impl Neg for F32x4 {
185    type Output = Self;
186
187    fn neg(self) -> Self {
188        F32x4(core::array::from_fn(|i| -self.0[i]))
189    }
190}
191
192impl Default for F32x4 {
193    fn default() -> Self {
194        F32x4([0.0; 4])
195    }
196}
197
198// -----------------------------------------------------------------------------
199// Тесты
200// -----------------------------------------------------------------------------
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205
206    #[test]
207    fn test_f32x4_splat() {
208        let v = F32x4::splat(2.5);
209        assert_eq!(v.extract(0), 2.5);
210        assert_eq!(v.extract(3), 2.5);
211    }
212
213    #[test]
214    fn test_f32x4_add() {
215        let a = F32x4::splat(1.0);
216        let b = F32x4::splat(2.0);
217        let c = a + b;
218        assert_eq!(c.extract(0), 3.0);
219    }
220
221    #[test]
222    fn test_f32x4_mul() {
223        let a = F32x4::splat(3.0);
224        let b = F32x4::splat(4.0);
225        let c = a * b;
226        assert_eq!(c.extract(0), 12.0);
227    }
228
229    #[test]
230    fn test_f32x4_sin() {
231        let a = F32x4::splat(0.0);
232        let b = a.sin();
233        assert_eq!(b.extract(0), 0.0);
234    }
235}