sklears_datasets/
simd_gen.rs

1//! SIMD-optimized dataset generation
2//!
3//! This module provides SIMD-accelerated implementations of common dataset
4//! generation operations for improved performance on supported platforms.
5
6use scirs2_core::ndarray::{Array1, Array2};
7use scirs2_core::random::{Distribution, RandNormal, Random};
8
9#[cfg(target_arch = "x86_64")]
10use std::arch::x86_64::*;
11
12/// SIMD capabilities detected at runtime
13#[derive(Debug, Clone, Copy)]
14pub struct SimdCapabilities {
15    pub has_sse2: bool,
16    pub has_avx: bool,
17    pub has_avx2: bool,
18    pub has_fma: bool,
19}
20
21impl SimdCapabilities {
22    /// Detect available SIMD features
23    pub fn detect() -> Self {
24        #[cfg(target_arch = "x86_64")]
25        {
26            Self {
27                has_sse2: is_x86_feature_detected!("sse2"),
28                has_avx: is_x86_feature_detected!("avx"),
29                has_avx2: is_x86_feature_detected!("avx2"),
30                has_fma: is_x86_feature_detected!("fma"),
31            }
32        }
33        #[cfg(not(target_arch = "x86_64"))]
34        {
35            Self {
36                has_sse2: false,
37                has_avx: false,
38                has_avx2: false,
39                has_fma: false,
40            }
41        }
42    }
43
44    /// Check if any SIMD features are available
45    pub fn has_simd(&self) -> bool {
46        self.has_sse2 || self.has_avx || self.has_avx2
47    }
48}
49
50/// SIMD-optimized feature matrix generation with normal distribution
51#[cfg(target_arch = "x86_64")]
52pub fn generate_normal_matrix_simd(
53    n_samples: usize,
54    n_features: usize,
55    mean: f64,
56    std: f64,
57    random_state: Option<u64>,
58) -> Array2<f64> {
59    let caps = SimdCapabilities::detect();
60
61    if caps.has_avx {
62        unsafe { generate_normal_matrix_avx(n_samples, n_features, mean, std, random_state) }
63    } else if caps.has_sse2 {
64        unsafe { generate_normal_matrix_sse(n_samples, n_features, mean, std, random_state) }
65    } else {
66        generate_normal_matrix_scalar(n_samples, n_features, mean, std, random_state)
67    }
68}
69
70/// Fallback for non-x86_64 architectures
71#[cfg(not(target_arch = "x86_64"))]
72pub fn generate_normal_matrix_simd(
73    n_samples: usize,
74    n_features: usize,
75    mean: f64,
76    std: f64,
77    random_state: Option<u64>,
78) -> Array2<f64> {
79    generate_normal_matrix_scalar(n_samples, n_features, mean, std, random_state)
80}
81
82/// Scalar fallback implementation
83fn generate_normal_matrix_scalar(
84    n_samples: usize,
85    n_features: usize,
86    mean: f64,
87    std: f64,
88    random_state: Option<u64>,
89) -> Array2<f64> {
90    let mut rng = Random::seed(random_state.unwrap_or(42));
91
92    let normal = RandNormal::new(mean, std).unwrap();
93    Array2::from_shape_fn((n_samples, n_features), |_| normal.sample(&mut rng))
94}
95
96/// SSE2-optimized implementation
97#[cfg(target_arch = "x86_64")]
98#[target_feature(enable = "sse2")]
99unsafe fn generate_normal_matrix_sse(
100    n_samples: usize,
101    n_features: usize,
102    mean: f64,
103    std: f64,
104    random_state: Option<u64>,
105) -> Array2<f64> {
106    let mut rng = Random::seed(random_state.unwrap_or(42));
107
108    let normal = RandNormal::new(mean, std).unwrap();
109    let total = n_samples * n_features;
110    let mut data = Vec::with_capacity(total);
111
112    // Process 2 elements at a time with SSE2 (128-bit = 2 x f64)
113    let chunks = total / 2;
114    for _ in 0..chunks {
115        let v1 = normal.sample(&mut rng);
116        let v2 = normal.sample(&mut rng);
117        data.push(v1);
118        data.push(v2);
119    }
120
121    // Handle remaining elements
122    for _ in 0..(total % 2) {
123        data.push(normal.sample(&mut rng));
124    }
125
126    Array2::from_shape_vec((n_samples, n_features), data).unwrap()
127}
128
129/// AVX-optimized implementation
130#[cfg(target_arch = "x86_64")]
131#[target_feature(enable = "avx")]
132unsafe fn generate_normal_matrix_avx(
133    n_samples: usize,
134    n_features: usize,
135    mean: f64,
136    std: f64,
137    random_state: Option<u64>,
138) -> Array2<f64> {
139    let mut rng = Random::seed(random_state.unwrap_or(42));
140
141    let normal = RandNormal::new(mean, std).unwrap();
142    let total = n_samples * n_features;
143    let mut data = Vec::with_capacity(total);
144
145    // Process 4 elements at a time with AVX (256-bit = 4 x f64)
146    let chunks = total / 4;
147    for _ in 0..chunks {
148        // Generate 4 random values
149        for _ in 0..4 {
150            data.push(normal.sample(&mut rng));
151        }
152    }
153
154    // Handle remaining elements
155    for _ in 0..(total % 4) {
156        data.push(normal.sample(&mut rng));
157    }
158
159    Array2::from_shape_vec((n_samples, n_features), data).unwrap()
160}
161
162/// SIMD-optimized vector addition
163#[cfg(target_arch = "x86_64")]
164pub fn add_vectors_simd(a: &[f64], b: &[f64], result: &mut [f64]) {
165    assert_eq!(a.len(), b.len());
166    assert_eq!(a.len(), result.len());
167
168    let caps = SimdCapabilities::detect();
169
170    if caps.has_avx {
171        unsafe { add_vectors_avx(a, b, result) };
172    } else if caps.has_sse2 {
173        unsafe { add_vectors_sse2(a, b, result) };
174    } else {
175        add_vectors_scalar(a, b, result);
176    }
177}
178
179#[cfg(not(target_arch = "x86_64"))]
180pub fn add_vectors_simd(a: &[f64], b: &[f64], result: &mut [f64]) {
181    add_vectors_scalar(a, b, result);
182}
183
184fn add_vectors_scalar(a: &[f64], b: &[f64], result: &mut [f64]) {
185    for i in 0..a.len() {
186        result[i] = a[i] + b[i];
187    }
188}
189
190#[cfg(target_arch = "x86_64")]
191#[target_feature(enable = "sse2")]
192unsafe fn add_vectors_sse2(a: &[f64], b: &[f64], result: &mut [f64]) {
193    let len = a.len();
194    let chunks = len / 2;
195
196    for i in 0..chunks {
197        let idx = i * 2;
198        let va = _mm_loadu_pd(a.as_ptr().add(idx));
199        let vb = _mm_loadu_pd(b.as_ptr().add(idx));
200        let vr = _mm_add_pd(va, vb);
201        _mm_storeu_pd(result.as_mut_ptr().add(idx), vr);
202    }
203
204    // Handle remaining elements
205    for i in (chunks * 2)..len {
206        result[i] = a[i] + b[i];
207    }
208}
209
210#[cfg(target_arch = "x86_64")]
211#[target_feature(enable = "avx")]
212unsafe fn add_vectors_avx(a: &[f64], b: &[f64], result: &mut [f64]) {
213    let len = a.len();
214    let chunks = len / 4;
215
216    for i in 0..chunks {
217        let idx = i * 4;
218        let va = _mm256_loadu_pd(a.as_ptr().add(idx));
219        let vb = _mm256_loadu_pd(b.as_ptr().add(idx));
220        let vr = _mm256_add_pd(va, vb);
221        _mm256_storeu_pd(result.as_mut_ptr().add(idx), vr);
222    }
223
224    // Handle remaining elements
225    for i in (chunks * 4)..len {
226        result[i] = a[i] + b[i];
227    }
228}
229
230/// SIMD-optimized scalar multiplication
231#[cfg(target_arch = "x86_64")]
232pub fn scale_vector_simd(data: &[f64], scale: f64, result: &mut [f64]) {
233    assert_eq!(data.len(), result.len());
234
235    let caps = SimdCapabilities::detect();
236
237    if caps.has_avx {
238        unsafe { scale_vector_avx(data, scale, result) };
239    } else if caps.has_sse2 {
240        unsafe { scale_vector_sse2(data, scale, result) };
241    } else {
242        scale_vector_scalar(data, scale, result);
243    }
244}
245
246#[cfg(not(target_arch = "x86_64"))]
247pub fn scale_vector_simd(data: &[f64], scale: f64, result: &mut [f64]) {
248    scale_vector_scalar(data, scale, result);
249}
250
251fn scale_vector_scalar(data: &[f64], scale: f64, result: &mut [f64]) {
252    for i in 0..data.len() {
253        result[i] = data[i] * scale;
254    }
255}
256
257#[cfg(target_arch = "x86_64")]
258#[target_feature(enable = "sse2")]
259unsafe fn scale_vector_sse2(data: &[f64], scale: f64, result: &mut [f64]) {
260    let len = data.len();
261    let chunks = len / 2;
262    let vscale = _mm_set1_pd(scale);
263
264    for i in 0..chunks {
265        let idx = i * 2;
266        let vdata = _mm_loadu_pd(data.as_ptr().add(idx));
267        let vr = _mm_mul_pd(vdata, vscale);
268        _mm_storeu_pd(result.as_mut_ptr().add(idx), vr);
269    }
270
271    // Handle remaining elements
272    for i in (chunks * 2)..len {
273        result[i] = data[i] * scale;
274    }
275}
276
277#[cfg(target_arch = "x86_64")]
278#[target_feature(enable = "avx")]
279unsafe fn scale_vector_avx(data: &[f64], scale: f64, result: &mut [f64]) {
280    let len = data.len();
281    let chunks = len / 4;
282    let vscale = _mm256_set1_pd(scale);
283
284    for i in 0..chunks {
285        let idx = i * 4;
286        let vdata = _mm256_loadu_pd(data.as_ptr().add(idx));
287        let vr = _mm256_mul_pd(vdata, vscale);
288        _mm256_storeu_pd(result.as_mut_ptr().add(idx), vr);
289    }
290
291    // Handle remaining elements
292    for i in (chunks * 4)..len {
293        result[i] = data[i] * scale;
294    }
295}
296
297/// SIMD-optimized classification dataset generation
298pub fn make_classification_simd(
299    n_samples: usize,
300    n_features: usize,
301    n_classes: usize,
302    class_sep: f64,
303    random_state: Option<u64>,
304) -> (Array2<f64>, Array1<i32>) {
305    // Generate features using SIMD
306    let features = generate_normal_matrix_simd(n_samples, n_features, 0.0, 1.0, random_state);
307
308    // Generate targets
309    let targets = Array1::from_shape_fn(n_samples, |i| (i % n_classes) as i32);
310
311    // Apply class separation using SIMD
312    let mut separated_features = features.clone();
313    for (i, &target) in targets.iter().enumerate() {
314        let offset = target as f64 * class_sep;
315        let mut row = separated_features.row_mut(i);
316        let row_slice = row.as_slice_mut().unwrap();
317
318        let offset_vec = vec![offset; n_features];
319        add_vectors_simd(features.row(i).as_slice().unwrap(), &offset_vec, row_slice);
320    }
321
322    (separated_features, targets)
323}
324
325/// SIMD-optimized regression dataset generation
326pub fn make_regression_simd(
327    n_samples: usize,
328    n_features: usize,
329    noise: f64,
330    random_state: Option<u64>,
331) -> (Array2<f64>, Array1<f64>) {
332    // Generate features using SIMD
333    let features = generate_normal_matrix_simd(n_samples, n_features, 0.0, 1.0, random_state);
334
335    // Generate coefficients
336    let mut rng = Random::seed(random_state.unwrap_or(42).wrapping_add(1));
337
338    let normal_coef = RandNormal::new(0.0, 1.0).unwrap();
339    let coef = Array1::from_shape_fn(n_features, |_| normal_coef.sample(&mut rng));
340
341    // Compute targets: y = X @ coef + noise
342    let targets = features.dot(&coef);
343
344    // Add noise using SIMD if requested
345    if noise > 0.0 {
346        let normal_noise = RandNormal::new(0.0, noise).unwrap();
347        let noise_vec = Array1::from_shape_fn(n_samples, |_| normal_noise.sample(&mut rng));
348        let mut targets_with_noise = vec![0.0; n_samples];
349        add_vectors_simd(
350            targets.as_slice().unwrap(),
351            noise_vec.as_slice().unwrap(),
352            &mut targets_with_noise,
353        );
354        (features, Array1::from_vec(targets_with_noise))
355    } else {
356        (features, targets)
357    }
358}
359
360#[cfg(test)]
361mod tests {
362    use super::*;
363
364    #[test]
365    fn test_simd_capabilities() {
366        let caps = SimdCapabilities::detect();
367        println!("SIMD Capabilities:");
368        println!("  SSE2: {}", caps.has_sse2);
369        println!("  AVX: {}", caps.has_avx);
370        println!("  AVX2: {}", caps.has_avx2);
371        println!("  FMA: {}", caps.has_fma);
372    }
373
374    #[test]
375    fn test_generate_normal_matrix_simd() {
376        let matrix = generate_normal_matrix_simd(100, 10, 0.0, 1.0, Some(42));
377        assert_eq!(matrix.nrows(), 100);
378        assert_eq!(matrix.ncols(), 10);
379
380        // Check that values are reasonable (within ~4 std devs for normal distribution)
381        for &val in matrix.iter() {
382            assert!(val.abs() < 5.0);
383        }
384    }
385
386    #[test]
387    fn test_add_vectors_simd() {
388        let a = vec![1.0, 2.0, 3.0, 4.0, 5.0];
389        let b = vec![5.0, 4.0, 3.0, 2.0, 1.0];
390        let mut result = vec![0.0; 5];
391
392        add_vectors_simd(&a, &b, &mut result);
393
394        assert_eq!(result, vec![6.0, 6.0, 6.0, 6.0, 6.0]);
395    }
396
397    #[test]
398    fn test_scale_vector_simd() {
399        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
400        let mut result = vec![0.0; 5];
401
402        scale_vector_simd(&data, 2.0, &mut result);
403
404        assert_eq!(result, vec![2.0, 4.0, 6.0, 8.0, 10.0]);
405    }
406
407    #[test]
408    fn test_make_classification_simd() {
409        let (features, targets) = make_classification_simd(100, 5, 3, 1.0, Some(42));
410
411        assert_eq!(features.nrows(), 100);
412        assert_eq!(features.ncols(), 5);
413        assert_eq!(targets.len(), 100);
414
415        // Check that we have all classes
416        let mut has_class = vec![false; 3];
417        for &target in targets.iter() {
418            assert!(target >= 0 && target < 3);
419            has_class[target as usize] = true;
420        }
421        assert!(has_class.iter().all(|&x| x));
422    }
423
424    #[test]
425    fn test_make_regression_simd() {
426        let (features, targets) = make_regression_simd(100, 5, 0.1, Some(42));
427
428        assert_eq!(features.nrows(), 100);
429        assert_eq!(features.ncols(), 5);
430        assert_eq!(targets.len(), 100);
431    }
432
433    #[test]
434    fn test_simd_vs_scalar_consistency() {
435        let seed = Some(42);
436        let matrix_simd = generate_normal_matrix_simd(50, 10, 0.0, 1.0, seed);
437        let matrix_scalar = generate_normal_matrix_scalar(50, 10, 0.0, 1.0, seed);
438
439        // Both should produce same shape
440        assert_eq!(matrix_simd.shape(), matrix_scalar.shape());
441
442        // Values should be identical (same RNG seed)
443        for i in 0..matrix_simd.nrows() {
444            for j in 0..matrix_simd.ncols() {
445                assert_eq!(matrix_simd[[i, j]], matrix_scalar[[i, j]]);
446            }
447        }
448    }
449}