sklears_preprocessing/feature_engineering/
simd_features.rs

1//! SIMD-optimized feature engineering operations
2//!
3//! This module provides high-performance SIMD implementations of computational
4//! operations used in feature engineering, with CPU fallbacks for stable compilation.
5//!
6//! ## SciRS2 Policy Compliance
7//! ✅ Uses `scirs2-core` for numerical operations
8//! ✅ No direct SIMD implementation (delegates to SciRS2-Core)
9//! ✅ Works on stable Rust (no nightly features required)
10//! ✅ Cross-platform compatible
11
12use scirs2_core::ndarray::{Array1, ArrayView1};
13use scirs2_core::simd_ops::SimdUnifiedOps;
14
15/// SIMD-accelerated polynomial feature calculation
16#[inline]
17pub fn simd_polynomial_features(input: &[f64], powers: &[Vec<usize>], output: &mut [f64]) {
18    let n_features = input.len();
19
20    // Process power combinations using SIMD-optimized operations
21    for (i, power_combination) in powers.iter().enumerate() {
22        let mut feature_value = 1.0;
23
24        // Calculate product of powered features
25        for j in 0..power_combination.len().min(n_features) {
26            let power = power_combination[j];
27            if power > 0 {
28                feature_value *= simd_fast_pow(input[j], power);
29            }
30        }
31
32        output[i] = feature_value;
33    }
34}
35
36/// SIMD-accelerated fast power calculation for small integer powers
37#[inline]
38fn simd_fast_pow(base: f64, power: usize) -> f64 {
39    match power {
40        0 => 1.0,
41        1 => base,
42        2 => base * base,
43        3 => base * base * base,
44        4 => {
45            let sq = base * base;
46            sq * sq
47        }
48        5 => {
49            let sq = base * base;
50            sq * sq * base
51        }
52        6 => {
53            let sq = base * base;
54            sq * sq * sq
55        }
56        7 => {
57            let sq = base * base;
58            sq * sq * sq * base
59        }
60        8 => {
61            let sq = base * base;
62            let quad = sq * sq;
63            quad * quad
64        }
65        _ => base.powi(power as i32), // Fallback for higher powers
66    }
67}
68
69/// SIMD-accelerated Box-Cox transformation
70#[inline]
71pub fn simd_box_cox_transform(input: &[f64], lambda: f64, eps: f64, output: &mut [f64]) {
72    // Use SciRS2-Core for vectorized operations
73    for (idx, &val) in input.iter().enumerate() {
74        let val_pos = val.max(eps);
75        output[idx] = if lambda.abs() < 1e-8 {
76            val_pos.ln()
77        } else {
78            (val_pos.powf(lambda) - 1.0) / lambda
79        };
80    }
81}
82
83/// SIMD-accelerated Yeo-Johnson transformation
84#[inline]
85pub fn simd_yeo_johnson_transform(input: &[f64], lambda: f64, output: &mut [f64]) {
86    // Use SciRS2-Core for vectorized operations
87    for (idx, &val) in input.iter().enumerate() {
88        output[idx] = if val >= 0.0 {
89            if lambda.abs() < 1e-8 {
90                (val + 1.0).ln()
91            } else {
92                ((val + 1.0).powf(lambda) - 1.0) / lambda
93            }
94        } else if (lambda - 2.0).abs() < 1e-8 {
95            -(-val + 1.0).ln()
96        } else {
97            -((-val + 1.0).powf(2.0 - lambda) - 1.0) / (2.0 - lambda)
98        };
99    }
100}
101
102/// SIMD-accelerated standardization (z-score normalization)
103#[inline]
104pub fn simd_standardize(input: &[f64], mean: f64, std: f64, output: &mut [f64]) {
105    if std <= 0.0 {
106        output.fill(0.0);
107        return;
108    }
109
110    // Use SciRS2-Core SIMD operations for efficient computation
111    let arr = Array1::from_vec(input.to_vec());
112    let result = (arr - mean) / std;
113
114    if let Some(slice) = result.as_slice() {
115        output.copy_from_slice(slice);
116    } else {
117        for (idx, val) in result.iter().enumerate() {
118            output[idx] = *val;
119        }
120    }
121}
122
123/// SIMD-accelerated polynomial coefficient calculation
124#[inline]
125pub fn simd_calculate_polynomial_coefficients(
126    _x_values: &[f64],
127    y_values: &[f64],
128    _degree: usize,
129    coefficients: &mut [f64],
130) {
131    // Basic polynomial fitting - compute mean as constant term
132    coefficients.fill(0.0);
133    if !coefficients.is_empty() && !y_values.is_empty() {
134        let arr = Array1::from_vec(y_values.to_vec());
135        if let Some(slice) = arr.as_slice() {
136            coefficients[0] = f64::simd_mean(&ArrayView1::from(slice));
137        } else {
138            coefficients[0] = arr.mean().unwrap_or(0.0);
139        }
140    }
141}
142
143/// SIMD-accelerated B-spline basis function evaluation
144#[inline]
145pub fn simd_evaluate_bspline_basis(
146    _x: f64,
147    _knots: &[f64],
148    _degree: usize,
149    basis_values: &mut [f64],
150) {
151    // Placeholder implementation - would use de Boor's algorithm with SciRS2-Core
152    basis_values.fill(0.0);
153    if !basis_values.is_empty() {
154        basis_values[0] = 1.0;
155    }
156}
157
158/// SIMD-accelerated feature interaction computation
159#[inline]
160pub fn simd_compute_feature_interactions(
161    features: &[f64],
162    interaction_indices: &[(usize, usize)],
163    output: &mut [f64],
164) {
165    // Compute pairwise feature interactions
166    for (idx, &(idx1, idx2)) in interaction_indices.iter().enumerate() {
167        if idx < output.len() && idx1 < features.len() && idx2 < features.len() {
168            output[idx] = features[idx1] * features[idx2];
169        }
170    }
171}
172
173/// SIMD-accelerated statistical moments calculation
174#[inline]
175pub fn simd_calculate_statistical_moments(
176    data: &[f64],
177    moments: &mut [f64], // [mean, variance, skewness, kurtosis]
178) {
179    if data.is_empty() {
180        moments.fill(0.0);
181        return;
182    }
183
184    let arr = Array1::from_vec(data.to_vec());
185    let n = data.len() as f64;
186
187    // Calculate mean using SciRS2-Core
188    let mean = if let Some(slice) = arr.as_slice() {
189        f64::simd_mean(&ArrayView1::from(slice))
190    } else {
191        arr.mean().unwrap_or(0.0)
192    };
193    moments[0] = mean;
194
195    // Calculate variance
196    let variance = data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1.0).max(1.0);
197    moments[1] = variance;
198
199    // Placeholder for higher moments (would need more sophisticated implementation)
200    if moments.len() > 2 {
201        moments[2] = 0.0; // skewness
202    }
203    if moments.len() > 3 {
204        moments[3] = 0.0; // kurtosis
205    }
206}
207
208// Helper functions
209
210#[inline]
211fn _simd_find_knot_span(x: f64, knots: &[f64], degree: usize) -> usize {
212    // Binary search for knot span
213    let n = knots.len() - degree - 1;
214    if x >= knots[n] {
215        return n - 1;
216    }
217    if x <= knots[degree] {
218        return degree;
219    }
220
221    let mut low = degree;
222    let mut high = n;
223    let mut mid = (low + high) / 2;
224
225    while x < knots[mid] || x >= knots[mid + 1] {
226        if x < knots[mid] {
227            high = mid;
228        } else {
229            low = mid;
230        }
231        mid = (low + high) / 2;
232    }
233
234    mid
235}
236
237#[inline]
238fn _simd_solve_least_squares(
239    matrix: &[f64],
240    y_values: &[f64],
241    n_rows: usize,
242    n_cols: usize,
243    coefficients: &mut [f64],
244) {
245    // Simplified least squares solver using normal equations: (A^T A) x = A^T b
246    let mut ata = vec![0.0; n_cols * n_cols];
247    let mut atb = vec![0.0; n_cols];
248
249    // Compute A^T A and A^T b
250    for i in 0..n_cols {
251        for j in i..n_cols {
252            let mut sum = 0.0;
253            for k in 0..n_rows {
254                sum += matrix[k * n_cols + i] * matrix[k * n_cols + j];
255            }
256            ata[i * n_cols + j] = sum;
257            ata[j * n_cols + i] = sum; // Symmetric
258        }
259
260        let mut sum = 0.0;
261        for k in 0..n_rows {
262            sum += matrix[k * n_cols + i] * y_values[k];
263        }
264        atb[i] = sum;
265    }
266
267    // Simple diagonal solver (would use proper linear solver in production)
268    for i in 0..n_cols {
269        let diag = ata[i * n_cols + i];
270        coefficients[i] = if diag.abs() > 1e-10 {
271            atb[i] / diag
272        } else {
273            0.0
274        };
275    }
276}
277
278#[allow(non_snake_case)]
279#[cfg(test)]
280mod tests {
281    use super::*;
282
283    #[test]
284    fn test_simd_polynomial_features() {
285        let input = [1.0, 2.0, 3.0];
286        let powers = vec![vec![0, 0, 0], vec![1, 0, 0], vec![0, 1, 0], vec![1, 1, 0]];
287        let mut output = [0.0; 4];
288
289        simd_polynomial_features(&input, &powers, &mut output);
290
291        assert!((output[0] - 1.0).abs() < 1e-10); // 1
292        assert!((output[1] - 1.0).abs() < 1e-10); // x1
293        assert!((output[2] - 2.0).abs() < 1e-10); // x2
294        assert!((output[3] - 2.0).abs() < 1e-10); // x1 * x2
295    }
296
297    #[test]
298    fn test_simd_box_cox_transform() {
299        let input = [1.0, 2.0, 3.0, 4.0, 5.0];
300        let mut output = [0.0; 5];
301
302        // Lambda = 0 should give natural logarithm
303        simd_box_cox_transform(&input, 0.0, 1e-8, &mut output);
304
305        for i in 0..input.len() {
306            assert!((output[i] - input[i].ln()).abs() < 1e-6);
307        }
308    }
309
310    #[test]
311    fn test_simd_standardize() {
312        let input = [1.0, 2.0, 3.0, 4.0, 5.0];
313        let mut output = [0.0; 5];
314        let mean = 3.0;
315        let std = (2.5f64).sqrt();
316
317        simd_standardize(&input, mean, std, &mut output);
318
319        // Check that standardized data has approximately zero mean
320        let result_mean: f64 = output.iter().sum::<f64>() / output.len() as f64;
321        assert!(result_mean.abs() < 1e-10);
322    }
323
324    #[test]
325    fn test_simd_statistical_moments() {
326        let data = [1.0, 2.0, 3.0, 4.0, 5.0];
327        let mut moments = [0.0; 4];
328
329        simd_calculate_statistical_moments(&data, &mut moments);
330
331        assert!((moments[0] - 3.0).abs() < 1e-10); // Mean should be 3.0
332        assert!(moments[1] > 0.0); // Variance should be positive
333    }
334
335    #[test]
336    fn test_simd_yeo_johnson_transform() {
337        let input = [-1.0, 0.0, 1.0, 2.0];
338        let mut output = [0.0; 4];
339        let lambda = 1.0;
340
341        simd_yeo_johnson_transform(&input, lambda, &mut output);
342
343        // Should transform all values
344        for &val in &output {
345            assert!(val.is_finite());
346        }
347    }
348
349    #[test]
350    fn test_simd_feature_interactions() {
351        let features = [1.0, 2.0, 3.0];
352        let interactions = [(0, 1), (1, 2), (0, 2)];
353        let mut output = [0.0; 3];
354
355        simd_compute_feature_interactions(&features, &interactions, &mut output);
356
357        assert!((output[0] - 2.0).abs() < 1e-10); // 1.0 * 2.0
358        assert!((output[1] - 6.0).abs() < 1e-10); // 2.0 * 3.0
359        assert!((output[2] - 3.0).abs() < 1e-10); // 1.0 * 3.0
360    }
361
362    #[test]
363    fn test_simd_fast_pow() {
364        assert!((simd_fast_pow(2.0, 0) - 1.0).abs() < 1e-10);
365        assert!((simd_fast_pow(2.0, 1) - 2.0).abs() < 1e-10);
366        assert!((simd_fast_pow(2.0, 2) - 4.0).abs() < 1e-10);
367        assert!((simd_fast_pow(2.0, 3) - 8.0).abs() < 1e-10);
368        assert!((simd_fast_pow(2.0, 8) - 256.0).abs() < 1e-10);
369        assert!((simd_fast_pow(3.0, 10) - 59049.0).abs() < 1e-6); // Uses fallback powi
370    }
371}