1use scirs2_core::ndarray::{Array1, Array2};
7use scirs2_core::random::{Distribution, RandNormal, Random};
8
9#[cfg(target_arch = "x86_64")]
10use std::arch::x86_64::*;
11
12#[derive(Debug, Clone, Copy)]
14pub struct SimdCapabilities {
15 pub has_sse2: bool,
16 pub has_avx: bool,
17 pub has_avx2: bool,
18 pub has_fma: bool,
19}
20
21impl SimdCapabilities {
22 pub fn detect() -> Self {
24 #[cfg(target_arch = "x86_64")]
25 {
26 Self {
27 has_sse2: is_x86_feature_detected!("sse2"),
28 has_avx: is_x86_feature_detected!("avx"),
29 has_avx2: is_x86_feature_detected!("avx2"),
30 has_fma: is_x86_feature_detected!("fma"),
31 }
32 }
33 #[cfg(not(target_arch = "x86_64"))]
34 {
35 Self {
36 has_sse2: false,
37 has_avx: false,
38 has_avx2: false,
39 has_fma: false,
40 }
41 }
42 }
43
44 pub fn has_simd(&self) -> bool {
46 self.has_sse2 || self.has_avx || self.has_avx2
47 }
48}
49
50#[cfg(target_arch = "x86_64")]
52pub fn generate_normal_matrix_simd(
53 n_samples: usize,
54 n_features: usize,
55 mean: f64,
56 std: f64,
57 random_state: Option<u64>,
58) -> Array2<f64> {
59 let caps = SimdCapabilities::detect();
60
61 if caps.has_avx {
62 unsafe { generate_normal_matrix_avx(n_samples, n_features, mean, std, random_state) }
63 } else if caps.has_sse2 {
64 unsafe { generate_normal_matrix_sse(n_samples, n_features, mean, std, random_state) }
65 } else {
66 generate_normal_matrix_scalar(n_samples, n_features, mean, std, random_state)
67 }
68}
69
70#[cfg(not(target_arch = "x86_64"))]
72pub fn generate_normal_matrix_simd(
73 n_samples: usize,
74 n_features: usize,
75 mean: f64,
76 std: f64,
77 random_state: Option<u64>,
78) -> Array2<f64> {
79 generate_normal_matrix_scalar(n_samples, n_features, mean, std, random_state)
80}
81
82fn generate_normal_matrix_scalar(
84 n_samples: usize,
85 n_features: usize,
86 mean: f64,
87 std: f64,
88 random_state: Option<u64>,
89) -> Array2<f64> {
90 let mut rng = Random::seed(random_state.unwrap_or(42));
91
92 let normal = RandNormal::new(mean, std).unwrap();
93 Array2::from_shape_fn((n_samples, n_features), |_| normal.sample(&mut rng))
94}
95
96#[cfg(target_arch = "x86_64")]
98#[target_feature(enable = "sse2")]
99unsafe fn generate_normal_matrix_sse(
100 n_samples: usize,
101 n_features: usize,
102 mean: f64,
103 std: f64,
104 random_state: Option<u64>,
105) -> Array2<f64> {
106 let mut rng = Random::seed(random_state.unwrap_or(42));
107
108 let normal = RandNormal::new(mean, std).unwrap();
109 let total = n_samples * n_features;
110 let mut data = Vec::with_capacity(total);
111
112 let chunks = total / 2;
114 for _ in 0..chunks {
115 let v1 = normal.sample(&mut rng);
116 let v2 = normal.sample(&mut rng);
117 data.push(v1);
118 data.push(v2);
119 }
120
121 for _ in 0..(total % 2) {
123 data.push(normal.sample(&mut rng));
124 }
125
126 Array2::from_shape_vec((n_samples, n_features), data).unwrap()
127}
128
129#[cfg(target_arch = "x86_64")]
131#[target_feature(enable = "avx")]
132unsafe fn generate_normal_matrix_avx(
133 n_samples: usize,
134 n_features: usize,
135 mean: f64,
136 std: f64,
137 random_state: Option<u64>,
138) -> Array2<f64> {
139 let mut rng = Random::seed(random_state.unwrap_or(42));
140
141 let normal = RandNormal::new(mean, std).unwrap();
142 let total = n_samples * n_features;
143 let mut data = Vec::with_capacity(total);
144
145 let chunks = total / 4;
147 for _ in 0..chunks {
148 for _ in 0..4 {
150 data.push(normal.sample(&mut rng));
151 }
152 }
153
154 for _ in 0..(total % 4) {
156 data.push(normal.sample(&mut rng));
157 }
158
159 Array2::from_shape_vec((n_samples, n_features), data).unwrap()
160}
161
162#[cfg(target_arch = "x86_64")]
164pub fn add_vectors_simd(a: &[f64], b: &[f64], result: &mut [f64]) {
165 assert_eq!(a.len(), b.len());
166 assert_eq!(a.len(), result.len());
167
168 let caps = SimdCapabilities::detect();
169
170 if caps.has_avx {
171 unsafe { add_vectors_avx(a, b, result) };
172 } else if caps.has_sse2 {
173 unsafe { add_vectors_sse2(a, b, result) };
174 } else {
175 add_vectors_scalar(a, b, result);
176 }
177}
178
179#[cfg(not(target_arch = "x86_64"))]
180pub fn add_vectors_simd(a: &[f64], b: &[f64], result: &mut [f64]) {
181 add_vectors_scalar(a, b, result);
182}
183
184fn add_vectors_scalar(a: &[f64], b: &[f64], result: &mut [f64]) {
185 for i in 0..a.len() {
186 result[i] = a[i] + b[i];
187 }
188}
189
190#[cfg(target_arch = "x86_64")]
191#[target_feature(enable = "sse2")]
192unsafe fn add_vectors_sse2(a: &[f64], b: &[f64], result: &mut [f64]) {
193 let len = a.len();
194 let chunks = len / 2;
195
196 for i in 0..chunks {
197 let idx = i * 2;
198 let va = _mm_loadu_pd(a.as_ptr().add(idx));
199 let vb = _mm_loadu_pd(b.as_ptr().add(idx));
200 let vr = _mm_add_pd(va, vb);
201 _mm_storeu_pd(result.as_mut_ptr().add(idx), vr);
202 }
203
204 for i in (chunks * 2)..len {
206 result[i] = a[i] + b[i];
207 }
208}
209
210#[cfg(target_arch = "x86_64")]
211#[target_feature(enable = "avx")]
212unsafe fn add_vectors_avx(a: &[f64], b: &[f64], result: &mut [f64]) {
213 let len = a.len();
214 let chunks = len / 4;
215
216 for i in 0..chunks {
217 let idx = i * 4;
218 let va = _mm256_loadu_pd(a.as_ptr().add(idx));
219 let vb = _mm256_loadu_pd(b.as_ptr().add(idx));
220 let vr = _mm256_add_pd(va, vb);
221 _mm256_storeu_pd(result.as_mut_ptr().add(idx), vr);
222 }
223
224 for i in (chunks * 4)..len {
226 result[i] = a[i] + b[i];
227 }
228}
229
230#[cfg(target_arch = "x86_64")]
232pub fn scale_vector_simd(data: &[f64], scale: f64, result: &mut [f64]) {
233 assert_eq!(data.len(), result.len());
234
235 let caps = SimdCapabilities::detect();
236
237 if caps.has_avx {
238 unsafe { scale_vector_avx(data, scale, result) };
239 } else if caps.has_sse2 {
240 unsafe { scale_vector_sse2(data, scale, result) };
241 } else {
242 scale_vector_scalar(data, scale, result);
243 }
244}
245
246#[cfg(not(target_arch = "x86_64"))]
247pub fn scale_vector_simd(data: &[f64], scale: f64, result: &mut [f64]) {
248 scale_vector_scalar(data, scale, result);
249}
250
251fn scale_vector_scalar(data: &[f64], scale: f64, result: &mut [f64]) {
252 for i in 0..data.len() {
253 result[i] = data[i] * scale;
254 }
255}
256
257#[cfg(target_arch = "x86_64")]
258#[target_feature(enable = "sse2")]
259unsafe fn scale_vector_sse2(data: &[f64], scale: f64, result: &mut [f64]) {
260 let len = data.len();
261 let chunks = len / 2;
262 let vscale = _mm_set1_pd(scale);
263
264 for i in 0..chunks {
265 let idx = i * 2;
266 let vdata = _mm_loadu_pd(data.as_ptr().add(idx));
267 let vr = _mm_mul_pd(vdata, vscale);
268 _mm_storeu_pd(result.as_mut_ptr().add(idx), vr);
269 }
270
271 for i in (chunks * 2)..len {
273 result[i] = data[i] * scale;
274 }
275}
276
277#[cfg(target_arch = "x86_64")]
278#[target_feature(enable = "avx")]
279unsafe fn scale_vector_avx(data: &[f64], scale: f64, result: &mut [f64]) {
280 let len = data.len();
281 let chunks = len / 4;
282 let vscale = _mm256_set1_pd(scale);
283
284 for i in 0..chunks {
285 let idx = i * 4;
286 let vdata = _mm256_loadu_pd(data.as_ptr().add(idx));
287 let vr = _mm256_mul_pd(vdata, vscale);
288 _mm256_storeu_pd(result.as_mut_ptr().add(idx), vr);
289 }
290
291 for i in (chunks * 4)..len {
293 result[i] = data[i] * scale;
294 }
295}
296
297pub fn make_classification_simd(
299 n_samples: usize,
300 n_features: usize,
301 n_classes: usize,
302 class_sep: f64,
303 random_state: Option<u64>,
304) -> (Array2<f64>, Array1<i32>) {
305 let features = generate_normal_matrix_simd(n_samples, n_features, 0.0, 1.0, random_state);
307
308 let targets = Array1::from_shape_fn(n_samples, |i| (i % n_classes) as i32);
310
311 let mut separated_features = features.clone();
313 for (i, &target) in targets.iter().enumerate() {
314 let offset = target as f64 * class_sep;
315 let mut row = separated_features.row_mut(i);
316 let row_slice = row.as_slice_mut().unwrap();
317
318 let offset_vec = vec![offset; n_features];
319 add_vectors_simd(features.row(i).as_slice().unwrap(), &offset_vec, row_slice);
320 }
321
322 (separated_features, targets)
323}
324
325pub fn make_regression_simd(
327 n_samples: usize,
328 n_features: usize,
329 noise: f64,
330 random_state: Option<u64>,
331) -> (Array2<f64>, Array1<f64>) {
332 let features = generate_normal_matrix_simd(n_samples, n_features, 0.0, 1.0, random_state);
334
335 let mut rng = Random::seed(random_state.unwrap_or(42).wrapping_add(1));
337
338 let normal_coef = RandNormal::new(0.0, 1.0).unwrap();
339 let coef = Array1::from_shape_fn(n_features, |_| normal_coef.sample(&mut rng));
340
341 let targets = features.dot(&coef);
343
344 if noise > 0.0 {
346 let normal_noise = RandNormal::new(0.0, noise).unwrap();
347 let noise_vec = Array1::from_shape_fn(n_samples, |_| normal_noise.sample(&mut rng));
348 let mut targets_with_noise = vec![0.0; n_samples];
349 add_vectors_simd(
350 targets.as_slice().unwrap(),
351 noise_vec.as_slice().unwrap(),
352 &mut targets_with_noise,
353 );
354 (features, Array1::from_vec(targets_with_noise))
355 } else {
356 (features, targets)
357 }
358}
359
360#[cfg(test)]
361mod tests {
362 use super::*;
363
364 #[test]
365 fn test_simd_capabilities() {
366 let caps = SimdCapabilities::detect();
367 println!("SIMD Capabilities:");
368 println!(" SSE2: {}", caps.has_sse2);
369 println!(" AVX: {}", caps.has_avx);
370 println!(" AVX2: {}", caps.has_avx2);
371 println!(" FMA: {}", caps.has_fma);
372 }
373
374 #[test]
375 fn test_generate_normal_matrix_simd() {
376 let matrix = generate_normal_matrix_simd(100, 10, 0.0, 1.0, Some(42));
377 assert_eq!(matrix.nrows(), 100);
378 assert_eq!(matrix.ncols(), 10);
379
380 for &val in matrix.iter() {
382 assert!(val.abs() < 5.0);
383 }
384 }
385
386 #[test]
387 fn test_add_vectors_simd() {
388 let a = vec![1.0, 2.0, 3.0, 4.0, 5.0];
389 let b = vec![5.0, 4.0, 3.0, 2.0, 1.0];
390 let mut result = vec![0.0; 5];
391
392 add_vectors_simd(&a, &b, &mut result);
393
394 assert_eq!(result, vec![6.0, 6.0, 6.0, 6.0, 6.0]);
395 }
396
397 #[test]
398 fn test_scale_vector_simd() {
399 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
400 let mut result = vec![0.0; 5];
401
402 scale_vector_simd(&data, 2.0, &mut result);
403
404 assert_eq!(result, vec![2.0, 4.0, 6.0, 8.0, 10.0]);
405 }
406
407 #[test]
408 fn test_make_classification_simd() {
409 let (features, targets) = make_classification_simd(100, 5, 3, 1.0, Some(42));
410
411 assert_eq!(features.nrows(), 100);
412 assert_eq!(features.ncols(), 5);
413 assert_eq!(targets.len(), 100);
414
415 let mut has_class = vec![false; 3];
417 for &target in targets.iter() {
418 assert!(target >= 0 && target < 3);
419 has_class[target as usize] = true;
420 }
421 assert!(has_class.iter().all(|&x| x));
422 }
423
424 #[test]
425 fn test_make_regression_simd() {
426 let (features, targets) = make_regression_simd(100, 5, 0.1, Some(42));
427
428 assert_eq!(features.nrows(), 100);
429 assert_eq!(features.ncols(), 5);
430 assert_eq!(targets.len(), 100);
431 }
432
433 #[test]
434 fn test_simd_vs_scalar_consistency() {
435 let seed = Some(42);
436 let matrix_simd = generate_normal_matrix_simd(50, 10, 0.0, 1.0, seed);
437 let matrix_scalar = generate_normal_matrix_scalar(50, 10, 0.0, 1.0, seed);
438
439 assert_eq!(matrix_simd.shape(), matrix_scalar.shape());
441
442 for i in 0..matrix_simd.nrows() {
444 for j in 0..matrix_simd.ncols() {
445 assert_eq!(matrix_simd[[i, j]], matrix_scalar[[i, j]]);
446 }
447 }
448 }
449}