Skip to main content

rill_core/math/vector/simd/
x86.rs

1//! # x86/x86_64 SIMD implementations
2//!
3//! Uses SSE2, SSE4.1, AVX, AVX2 and AVX512 instructions for vector operations.
4
5#![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
6#![allow(unused_imports)]
7#![allow(dead_code)]
8
9use super::super::traits::*;
10use crate::Transcendental;
11
12// -----------------------------------------------------------------------------
13// SIMD types
14
15/// Vector of 4 f32 elements (SSE)
16#[derive(Copy, Clone, Debug, PartialEq)]
17#[repr(transparent)]
18pub struct F32x4([f32; 4]);
19
20/// Vector of 8 f32 elements (AVX)
21#[derive(Copy, Clone, Debug, PartialEq)]
22#[repr(transparent)]
23pub struct F32x8([f32; 8]);
24
25/// Vector of 16 f32 elements (AVX512)
26#[derive(Copy, Clone, Debug, PartialEq)]
27#[repr(transparent)]
28pub struct F32x16([f32; 16]);
29
30/// Vector of 2 f64 elements (SSE2)
31#[derive(Copy, Clone, Debug, PartialEq)]
32#[repr(transparent)]
33pub struct F64x2([f64; 2]);
34
35/// Vector of 4 f64 elements (AVX)
36#[derive(Copy, Clone, Debug, PartialEq)]
37#[repr(transparent)]
38pub struct F64x4([f64; 4]);
39
40/// Vector of 8 f64 elements (AVX512)
41#[derive(Copy, Clone, Debug, PartialEq)]
42#[repr(transparent)]
43pub struct F64x8([f64; 8]);
44
45// -----------------------------------------------------------------------------
46// Vector implementation for F32x4
47// -----------------------------------------------------------------------------
48
49impl Vector<f32, 4> for F32x4 {
50    fn splat(value: f32) -> Self {
51        F32x4([value; 4])
52    }
53
54    fn load(slice: &[f32]) -> Self {
55        let mut arr = [0.0; 4];
56        arr.copy_from_slice(&slice[0..4]);
57        F32x4(arr)
58    }
59
60    fn store(&self, slice: &mut [f32]) {
61        slice[0..4].copy_from_slice(&self.0);
62    }
63
64    fn extract(&self, index: usize) -> f32 {
65        self.0[index]
66    }
67
68    fn insert(&self, index: usize, value: f32) -> Self {
69        let mut arr = self.0;
70        arr[index] = value;
71        F32x4(arr)
72    }
73
74    fn add(&self, other: &Self) -> Self {
75        F32x4(core::array::from_fn(|i| self.0[i] + other.0[i]))
76    }
77
78    fn sub(&self, other: &Self) -> Self {
79        F32x4(core::array::from_fn(|i| self.0[i] - other.0[i]))
80    }
81
82    fn mul(&self, other: &Self) -> Self {
83        F32x4(core::array::from_fn(|i| self.0[i] * other.0[i]))
84    }
85
86    fn div(&self, other: &Self) -> Self {
87        F32x4(core::array::from_fn(|i| self.0[i] / other.0[i]))
88    }
89
90    fn rem(&self, other: &Self) -> Self {
91        F32x4(core::array::from_fn(|i| self.0[i] % other.0[i]))
92    }
93
94    fn neg(&self) -> Self {
95        F32x4(core::array::from_fn(|i| -self.0[i]))
96    }
97
98    fn abs(&self) -> Self {
99        F32x4(core::array::from_fn(|i| self.0[i].abs()))
100    }
101
102    fn min(&self, other: &Self) -> Self {
103        F32x4(core::array::from_fn(|i| self.0[i].min(other.0[i])))
104    }
105
106    fn max(&self, other: &Self) -> Self {
107        F32x4(core::array::from_fn(|i| self.0[i].max(other.0[i])))
108    }
109
110    fn clamp(&self, min: &Self, max: &Self) -> Self {
111        F32x4(core::array::from_fn(|i| {
112            self.0[i].clamp(min.0[i], max.0[i])
113        }))
114    }
115}
116
117impl VectorTranscendental<f32, 4> for F32x4 {
118    fn sqrt(&self) -> Self {
119        F32x4(core::array::from_fn(|i| self.0[i].sqrt()))
120    }
121    fn exp(&self) -> Self {
122        F32x4(core::array::from_fn(|i| self.0[i].exp()))
123    }
124    fn ln(&self) -> Self {
125        F32x4(core::array::from_fn(|i| self.0[i].ln()))
126    }
127    fn sin(&self) -> Self {
128        F32x4(core::array::from_fn(|i| self.0[i].sin()))
129    }
130    fn cos(&self) -> Self {
131        F32x4(core::array::from_fn(|i| self.0[i].cos()))
132    }
133    fn tan(&self) -> Self {
134        F32x4(core::array::from_fn(|i| self.0[i].tan()))
135    }
136}
137
138// For now, implement remaining types as stubs (scalar versions)
139// In the future, real SIMD instructions via core::arch::x86_64 will go here
140
141use std::ops::{Add, Div, Mul, Neg, Rem, Sub};
142
143impl Add for F32x4 {
144    type Output = Self;
145
146    fn add(self, rhs: Self) -> Self {
147        F32x4(core::array::from_fn(|i| self.0[i] + rhs.0[i]))
148    }
149}
150
151impl Sub for F32x4 {
152    type Output = Self;
153
154    fn sub(self, rhs: Self) -> Self {
155        F32x4(core::array::from_fn(|i| self.0[i] - rhs.0[i]))
156    }
157}
158
159impl Mul for F32x4 {
160    type Output = Self;
161
162    fn mul(self, rhs: Self) -> Self {
163        F32x4(core::array::from_fn(|i| self.0[i] * rhs.0[i]))
164    }
165}
166
167impl Div for F32x4 {
168    type Output = Self;
169
170    fn div(self, rhs: Self) -> Self {
171        F32x4(core::array::from_fn(|i| self.0[i] / rhs.0[i]))
172    }
173}
174
175impl Rem for F32x4 {
176    type Output = Self;
177
178    fn rem(self, rhs: Self) -> Self {
179        F32x4(core::array::from_fn(|i| self.0[i] % rhs.0[i]))
180    }
181}
182
183impl Neg for F32x4 {
184    type Output = Self;
185
186    fn neg(self) -> Self {
187        F32x4(core::array::from_fn(|i| -self.0[i]))
188    }
189}
190
191impl Default for F32x4 {
192    fn default() -> Self {
193        F32x4([0.0; 4])
194    }
195}
196
197// -----------------------------------------------------------------------------
198// Tests
199// -----------------------------------------------------------------------------
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204
205    #[test]
206    fn test_f32x4_splat() {
207        let v = F32x4::splat(2.5);
208        assert_eq!(v.extract(0), 2.5);
209        assert_eq!(v.extract(3), 2.5);
210    }
211
212    #[test]
213    fn test_f32x4_add() {
214        let a = F32x4::splat(1.0);
215        let b = F32x4::splat(2.0);
216        let c = a + b;
217        assert_eq!(c.extract(0), 3.0);
218    }
219
220    #[test]
221    fn test_f32x4_mul() {
222        let a = F32x4::splat(3.0);
223        let b = F32x4::splat(4.0);
224        let c = a * b;
225        assert_eq!(c.extract(0), 12.0);
226    }
227
228    #[test]
229    fn test_f32x4_sin() {
230        let a = F32x4::splat(0.0);
231        let b = a.sin();
232        assert_eq!(b.extract(0), 0.0);
233    }
234}