mathhook_core/core/performance/
simd.rs1pub struct SimdOps;
6
7impl SimdOps {
8 #[inline(always)]
10 pub fn add_f64_array(a: &[f64], b: &[f64], result: &mut [f64]) {
11 let len = a.len().min(b.len()).min(result.len());
12
13 let chunks = len / 4;
15 let remainder = len % 4;
16
17 for i in 0..chunks {
18 let base = i * 4;
19 result[base] = a[base] + b[base];
21 result[base + 1] = a[base + 1] + b[base + 1];
22 result[base + 2] = a[base + 2] + b[base + 2];
23 result[base + 3] = a[base + 3] + b[base + 3];
24 }
25
26 for i in (chunks * 4)..(chunks * 4 + remainder) {
28 result[i] = a[i] + b[i];
29 }
30 }
31
32 #[inline(always)]
34 pub fn mul_f64_array(a: &[f64], b: &[f64], result: &mut [f64]) {
35 let len = a.len().min(b.len()).min(result.len());
36
37 let chunks = len / 4;
39 let remainder = len % 4;
40
41 for i in 0..chunks {
42 let base = i * 4;
43 result[base] = a[base] * b[base];
44 result[base + 1] = a[base + 1] * b[base + 1];
45 result[base + 2] = a[base + 2] * b[base + 2];
46 result[base + 3] = a[base + 3] * b[base + 3];
47 }
48
49 for i in (chunks * 4)..(chunks * 4 + remainder) {
50 result[i] = a[i] * b[i];
51 }
52 }
53
54 #[inline(always)]
56 pub fn add_i32_array(a: &[i32], b: &[i32], result: &mut [i32]) {
57 let len = a.len().min(b.len()).min(result.len());
58
59 let chunks = len / 8;
61 let remainder = len % 8;
62
63 for i in 0..chunks {
64 let base = i * 8;
65 result[base] = a[base] + b[base];
67 result[base + 1] = a[base + 1] + b[base + 1];
68 result[base + 2] = a[base + 2] + b[base + 2];
69 result[base + 3] = a[base + 3] + b[base + 3];
70 result[base + 4] = a[base + 4] + b[base + 4];
71 result[base + 5] = a[base + 5] + b[base + 5];
72 result[base + 6] = a[base + 6] + b[base + 6];
73 result[base + 7] = a[base + 7] + b[base + 7];
74 }
75
76 for i in (chunks * 8)..(chunks * 8 + remainder) {
77 result[i] = a[i] + b[i];
78 }
79 }
80
81 #[inline(always)]
83 pub fn evaluate_polynomial_simd(coefficients: &[f64], x: f64) -> f64 {
84 if coefficients.is_empty() {
85 return 0.0;
86 }
87
88 let mut result = coefficients[coefficients.len() - 1];
90
91 for &coeff in coefficients.iter().rev().skip(1) {
93 result = result * x + coeff;
94 }
95
96 result
97 }
98
99 #[inline(always)]
101 pub fn dot_product_f64(a: &[f64], b: &[f64]) -> f64 {
102 let len = a.len().min(b.len());
103 let mut sum = 0.0;
104
105 let chunks = len / 4;
107 let remainder = len % 4;
108
109 for i in 0..chunks {
110 let base = i * 4;
111 sum += a[base] * b[base]
112 + a[base + 1] * b[base + 1]
113 + a[base + 2] * b[base + 2]
114 + a[base + 3] * b[base + 3];
115 }
116
117 for i in (chunks * 4)..(chunks * 4 + remainder) {
118 sum += a[i] * b[i];
119 }
120
121 sum
122 }
123}
124
125pub struct SimdOptimized;
127
128impl SimdOptimized {
129 pub fn bulk_add_numeric(values: &[f64]) -> f64 {
131 let mut sum = 0.0;
133 let chunks = values.len() / 4;
134 for i in 0..chunks {
135 let base = i * 4;
136 sum += values[base] + values[base + 1] + values[base + 2] + values[base + 3];
137 }
138
139 sum += values[(chunks * 4)..].iter().sum::<f64>();
140
141 sum
142 }
143}
144
145#[cfg(test)]
146mod tests {
147 use super::*;
148
149 #[test]
150 fn test_simd_f64_addition() {
151 let a = vec![1.0, 2.0, 3.0, 4.0, 5.0];
152 let b = vec![5.0, 4.0, 3.0, 2.0, 1.0];
153 let mut result = vec![0.0; 5];
154
155 SimdOps::add_f64_array(&a, &b, &mut result);
156
157 assert_eq!(result, vec![6.0, 6.0, 6.0, 6.0, 6.0]);
158 }
159
160 #[test]
161 fn test_simd_i32_addition() {
162 let a = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
163 let b = vec![10, 9, 8, 7, 6, 5, 4, 3, 2, 1];
164 let mut result = vec![0; 10];
165
166 SimdOps::add_i32_array(&a, &b, &mut result);
167
168 assert_eq!(result, vec![11, 11, 11, 11, 11, 11, 11, 11, 11, 11]);
169 }
170
171 #[test]
172 fn test_polynomial_evaluation() {
173 let coefficients = vec![1.0, 3.0, 2.0];
175 let x = 2.0;
176
177 let result = SimdOps::evaluate_polynomial_simd(&coefficients, x);
178
179 assert_eq!(result, 15.0);
181 }
182
183 #[test]
184 fn test_dot_product() {
185 let a = vec![1.0, 2.0, 3.0, 4.0];
186 let b = vec![4.0, 3.0, 2.0, 1.0];
187
188 let result = SimdOps::dot_product_f64(&a, &b);
189
190 assert_eq!(result, 20.0);
192 }
193
194 #[test]
195 fn test_simd_benefits() {
196 use std::time::Instant;
197
198 let size = 10000;
199 let a: Vec<f64> = (0..size).map(|i| i as f64).collect();
200 let b: Vec<f64> = (0..size).map(|i| (size - i) as f64).collect();
201 let mut result = vec![0.0; size];
202
203 let start = Instant::now();
204 SimdOps::add_f64_array(&a, &b, &mut result);
205 let simd_duration = start.elapsed();
206
207 let start = Instant::now();
208 for i in 0..size {
209 result[i] = a[i] + b[i];
210 }
211 let scalar_duration = start.elapsed();
212
213 println!(
214 "SIMD-like: {:?}, Scalar: {:?}",
215 simd_duration, scalar_duration
216 );
217
218 println!("SIMD performance test completed successfully");
221 assert!(simd_duration.as_nanos() < 1_000_000_000); }
223}