Skip to main content

scirs2_transform/
features_simd.rs

1//! SIMD-accelerated feature engineering operations
2//!
3//! This module provides SIMD-optimized implementations of feature engineering operations
4//! using the unified SIMD operations from scirs2-core.
5
6use scirs2_core::ndarray::{Array1, Array2, ArrayBase, ArrayView1, Data, Ix2};
7use scirs2_core::numeric::{Float, NumCast};
8use scirs2_core::simd_ops::SimdUnifiedOps;
9use scirs2_core::validation::{check_not_empty, check_positive};
10
11use crate::error::{Result, TransformError};
12
13/// SIMD-accelerated polynomial feature generation
14pub struct SimdPolynomialFeatures<F: Float + NumCast + SimdUnifiedOps> {
15    degree: usize,
16    include_bias: bool,
17    interaction_only: bool,
18    _phantom: std::marker::PhantomData<F>,
19}
20
21impl<F: Float + NumCast + SimdUnifiedOps> SimdPolynomialFeatures<F> {
22    /// Creates a new SIMD-accelerated polynomial features generator
23    pub fn new(degree: usize, include_bias: bool, interactiononly: bool) -> Result<Self> {
24        if degree == 0 {
25            return Err(TransformError::InvalidInput(
26                "Degree must be at least 1".to_string(),
27            ));
28        }
29
30        Ok(SimdPolynomialFeatures {
31            degree,
32            include_bias,
33            interaction_only: interactiononly,
34            _phantom: std::marker::PhantomData,
35        })
36    }
37
38    /// Transforms input features to polynomial features using SIMD operations
39    pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
40    where
41        S: Data<Elem = F>,
42    {
43        // Validate input using scirs2-core validation
44        check_not_empty(x, "x")?;
45
46        // Check finite values
47        for &val in x.iter() {
48            if !val.is_finite() {
49                return Err(crate::error::TransformError::DataValidationError(
50                    "Data contains non-finite values".to_string(),
51                ));
52            }
53        }
54
55        let n_samples = x.shape()[0];
56        let nfeatures = x.shape()[1];
57
58        if n_samples == 0 || nfeatures == 0 {
59            return Err(TransformError::InvalidInput("Empty input data".to_string()));
60        }
61
62        if nfeatures > 1000 {
63            return Err(TransformError::InvalidInput(
64                "Too many features for polynomial expansion (>1000)".to_string(),
65            ));
66        }
67
68        // Calculate output dimensions with overflow check
69        let n_outputfeatures = self.calculate_n_outputfeatures(nfeatures)?;
70
71        // Check for memory constraints
72        if n_samples > 100_000 && n_outputfeatures > 10_000 {
73            return Err(TransformError::ComputationError(
74                "Output matrix would be too large (>1B elements)".to_string(),
75            ));
76        }
77
78        let mut output = Array2::zeros((n_samples, n_outputfeatures));
79
80        // Process samples in batches for better cache locality
81        let batch_size = self.calculate_optimal_batch_size(n_samples, n_outputfeatures);
82        for batch_start in (0..n_samples).step_by(batch_size) {
83            let batch_end = (batch_start + batch_size).min(n_samples);
84
85            for i in batch_start..batch_end {
86                let sample = x.row(i);
87                let poly_features = self.transform_sample_simd(&sample)?;
88
89                // Use SIMD copy if available
90                if poly_features.len() == n_outputfeatures {
91                    let mut output_row = output.row_mut(i);
92                    for (j, &val) in poly_features.iter().enumerate() {
93                        output_row[j] = val;
94                    }
95                } else {
96                    return Err(TransformError::ComputationError(
97                        "Feature count mismatch in polynomial expansion".to_string(),
98                    ));
99                }
100            }
101        }
102
103        Ok(output)
104    }
105
106    /// Transforms a single sample using SIMD operations
107    fn transform_sample_simd(&self, sample: &ArrayView1<F>) -> Result<Array1<F>> {
108        let nfeatures = sample.len();
109        let n_outputfeatures = self.calculate_n_outputfeatures(nfeatures)?;
110        let mut output = Array1::zeros(n_outputfeatures);
111        let mut output_idx = 0;
112
113        // Include bias term if requested
114        if self.include_bias {
115            output[output_idx] = F::one();
116            output_idx += 1;
117        }
118
119        // Copy original features
120        for j in 0..nfeatures {
121            output[output_idx] = sample[j];
122            output_idx += 1;
123        }
124
125        // Generate polynomial features
126        if self.degree > 1 {
127            if self.interaction_only {
128                // Only interaction terms (no powers of single features)
129                let _ = self.add_interaction_terms(sample, &mut output, output_idx, 2)?;
130            } else {
131                // All polynomial combinations
132                let _ = self.add_polynomial_terms(sample, &mut output, output_idx)?;
133            }
134        }
135
136        Ok(output)
137    }
138
139    /// Adds polynomial terms using SIMD operations where possible
140    fn add_polynomial_terms(
141        &self,
142        sample: &ArrayView1<F>,
143        output: &mut Array1<F>,
144        mut output_idx: usize,
145    ) -> Result<usize> {
146        let nfeatures = sample.len();
147
148        // For degree 2, use SIMD for efficient computation
149        if self.degree == 2 {
150            // Squared terms using SIMD
151            let squared = F::simd_mul(&sample.view(), &sample.view());
152            for j in 0..nfeatures {
153                output[output_idx] = squared[j];
154                output_idx += 1;
155            }
156
157            // Cross terms with vectorized operations where possible
158            for j in 0..nfeatures {
159                let remaining_features = nfeatures - j - 1;
160                if remaining_features > 0 {
161                    // Use SIMD for remaining cross products
162                    let sample_j = sample[j];
163                    let remaining_slice = sample.slice(scirs2_core::ndarray::s![j + 1..]);
164
165                    // Create a vector filled with sample[j]
166                    let sample_j_vec = Array1::from_elem(remaining_features, sample_j);
167                    let cross_products = F::simd_mul(&sample_j_vec.view(), &remaining_slice);
168
169                    for &val in cross_products.iter() {
170                        output[output_idx] = val;
171                        output_idx += 1;
172                    }
173                }
174            }
175        } else {
176            // For higher degrees, fall back to iterative computation
177            // but still use SIMD where beneficial
178            for current_degree in 2..=self.degree {
179                output_idx = self.add_degree_terms(sample, output, output_idx, current_degree)?;
180            }
181        }
182
183        Ok(output_idx)
184    }
185
186    /// Adds interaction terms only
187    fn add_interaction_terms(
188        &self,
189        sample: &ArrayView1<F>,
190        output: &mut Array1<F>,
191        mut output_idx: usize,
192        degree: usize,
193    ) -> Result<usize> {
194        let nfeatures = sample.len();
195
196        if degree == 2 {
197            // Pairwise interactions with SIMD optimization
198            for j in 0..nfeatures {
199                let remaining_features = nfeatures - j - 1;
200                if remaining_features > 0 {
201                    let sample_j = sample[j];
202                    let remaining_slice = sample.slice(scirs2_core::ndarray::s![j + 1..]);
203
204                    // Use SIMD for batch processing of interactions
205                    let sample_j_vec = Array1::from_elem(remaining_features, sample_j);
206                    let interactions = F::simd_mul(&sample_j_vec.view(), &remaining_slice);
207
208                    for &val in interactions.iter() {
209                        output[output_idx] = val;
210                        output_idx += 1;
211                    }
212                } else {
213                    // Fallback for remaining elements
214                    for k in j + 1..nfeatures {
215                        output[output_idx] = sample[j] * sample[k];
216                        output_idx += 1;
217                    }
218                }
219            }
220        } else {
221            // Higher-order interactions
222            let indices = self.generate_interaction_indices(nfeatures, degree);
223            for idx_set in indices {
224                let mut prod = F::one();
225                for &_idx in &idx_set {
226                    prod = prod * sample[_idx];
227                }
228                output[output_idx] = prod;
229                output_idx += 1;
230            }
231        }
232
233        Ok(output_idx)
234    }
235
236    /// Adds terms of a specific degree
237    fn add_degree_terms(
238        &self,
239        sample: &ArrayView1<F>,
240        output: &mut Array1<F>,
241        mut output_idx: usize,
242        degree: usize,
243    ) -> Result<usize> {
244        let nfeatures = sample.len();
245        let indices = self.generate_degree_indices(nfeatures, degree);
246
247        for idx_vec in indices {
248            let mut prod = F::one();
249            for &_idx in &idx_vec {
250                prod = prod * sample[_idx];
251            }
252            output[output_idx] = prod;
253            output_idx += 1;
254        }
255
256        Ok(output_idx)
257    }
258
259    /// Calculate optimal batch size based on memory characteristics
260    fn calculate_optimal_batch_size(&self, n_samples: usize, n_outputfeatures: usize) -> usize {
261        const L1_CACHE_SIZE: usize = 32_768;
262        let element_size = std::mem::size_of::<F>();
263
264        // Target: keep one batch worth of data in L1 cache
265        let elements_per_batch = L1_CACHE_SIZE / element_size / 2; // Conservative estimate
266        let max_batch_size = elements_per_batch / n_outputfeatures.max(1);
267
268        // Adaptive batch size based on data characteristics
269        let optimal_batch_size = if n_outputfeatures > 1000 {
270            // Large feature count: smaller batches
271            16.max(max_batch_size).min(64)
272        } else if n_samples > 50_000 {
273            // Large sample count: medium batches
274            64.max(max_batch_size).min(256)
275        } else {
276            // Standard case: larger batches for better vectorization
277            128.max(max_batch_size).min(512)
278        };
279
280        optimal_batch_size.min(n_samples)
281    }
282
283    /// Calculates the number of output features
284    fn calculate_n_outputfeatures(&self, nfeatures: usize) -> Result<usize> {
285        let mut count = if self.include_bias { 1 } else { 0 };
286        count += nfeatures; // Original _features
287
288        if self.degree > 1 {
289            if self.interaction_only {
290                // Only interaction terms
291                for d in 2..=self.degree {
292                    count += self.n_choose_k(nfeatures, d);
293                }
294            } else {
295                // All polynomial combinations
296                count += self.n_polynomial_features(nfeatures, self.degree) - nfeatures;
297                if self.include_bias {
298                    count -= 1;
299                }
300            }
301        }
302
303        Ok(count)
304    }
305
306    /// Calculates n choose k
307    fn n_choose_k(&self, n: usize, k: usize) -> usize {
308        if k > n {
309            return 0;
310        }
311        if k == 0 || k == n {
312            return 1;
313        }
314
315        let mut result = 1;
316        for i in 0..k {
317            result = result * (n - i) / (i + 1);
318        }
319        result
320    }
321
322    /// Calculates the number of polynomial features
323    fn n_polynomial_features(&self, nfeatures: usize, degree: usize) -> usize {
324        self.n_choose_k(nfeatures + degree, degree)
325    }
326
327    /// Generates indices for interaction terms
328    fn generate_interaction_indices(&self, nfeatures: usize, degree: usize) -> Vec<Vec<usize>> {
329        let mut indices = Vec::new();
330        let mut current = vec![0; degree];
331
332        loop {
333            // Add current combination
334            indices.push(current.clone());
335
336            // Find the rightmost element that can be incremented
337            let mut i = degree - 1;
338            loop {
339                current[i] += 1;
340                if current[i] < nfeatures - (degree - 1 - i) {
341                    // Reset all elements to the right
342                    for j in i + 1..degree {
343                        current[j] = current[j - 1] + 1;
344                    }
345                    break;
346                }
347                if i == 0 {
348                    return indices;
349                }
350                i -= 1;
351            }
352        }
353    }
354
355    /// Generates indices for polynomial terms of a specific degree
356    fn generate_degree_indices(&self, nfeatures: usize, degree: usize) -> Vec<Vec<usize>> {
357        let mut indices = Vec::new();
358        let mut current = vec![0; degree];
359
360        loop {
361            // Add current combination (with repetition allowed)
362            indices.push(current.clone());
363
364            // Find the rightmost element that can be incremented
365            let mut i = degree - 1;
366            loop {
367                current[i] += 1;
368                if current[i] < nfeatures {
369                    // Reset all elements to the right to the same value
370                    for j in i + 1..degree {
371                        current[j] = current[i];
372                    }
373                    break;
374                }
375                if i == 0 {
376                    return indices;
377                }
378                current[i] = 0;
379                i -= 1;
380            }
381        }
382    }
383}
384
385/// SIMD-accelerated power transformation (Box-Cox and Yeo-Johnson)
386#[allow(dead_code)]
387pub fn simd_power_transform<F>(data: &Array1<F>, lambda: F, method: &str) -> Result<Array1<F>>
388where
389    F: Float + NumCast + SimdUnifiedOps,
390{
391    let n = data.len();
392    let mut result = Array1::zeros(n);
393
394    match method {
395        "box-cox" => {
396            // Check for negative values
397            let min_val = F::simd_min_element(&data.view());
398            if min_val <= F::zero() {
399                return Err(TransformError::InvalidInput(
400                    "Box-Cox requires strictly positive values".to_string(),
401                ));
402            }
403
404            if lambda.abs() < F::from(1e-6).expect("Failed to convert constant to float") {
405                // lambda ≈ 0: log transform
406                for i in 0..n {
407                    result[i] = data[i].ln();
408                }
409            } else {
410                // General Box-Cox: (x^lambda - 1) / lambda
411                let ones = Array1::from_elem(n, F::one());
412                let powered = simd_array_pow(data, lambda)?;
413                let numerator = F::simd_sub(&powered.view(), &ones.view());
414                let lambda_array = Array1::from_elem(n, lambda);
415                result = F::simd_div(&numerator.view(), &lambda_array.view());
416            }
417        }
418        "yeo-johnson" => {
419            // Yeo-Johnson handles both positive and negative values
420            for i in 0..n {
421                let x = data[i];
422                if x >= F::zero() {
423                    if lambda.abs() < F::from(1e-6).expect("Failed to convert constant to float") {
424                        result[i] = x.ln() + F::one();
425                    } else {
426                        result[i] = ((x + F::one()).powf(lambda) - F::one()) / lambda;
427                    }
428                } else {
429                    if (F::from(2.0).expect("Failed to convert constant to float") - lambda).abs()
430                        < F::from(1e-6).expect("Failed to convert constant to float")
431                    {
432                        result[i] = -((-x + F::one()).ln());
433                    } else {
434                        result[i] = -((-x + F::one()).powf(
435                            F::from(2.0).expect("Failed to convert constant to float") - lambda,
436                        ) - F::one())
437                            / (F::from(2.0).expect("Failed to convert constant to float") - lambda);
438                    }
439                }
440            }
441        }
442        _ => {
443            return Err(TransformError::InvalidInput(
444                "Method must be 'box-cox' or 'yeo-johnson'".to_string(),
445            ));
446        }
447    }
448
449    Ok(result)
450}
451
452/// Helper function to compute element-wise power using SIMD where possible
453#[allow(dead_code)]
454fn simd_array_pow<F>(data: &Array1<F>, exponent: F) -> Result<Array1<F>>
455where
456    F: Float + NumCast + SimdUnifiedOps,
457{
458    let n = data.len();
459
460    if n == 0 {
461        return Ok(Array1::zeros(0));
462    }
463
464    if !exponent.is_finite() {
465        return Err(TransformError::InvalidInput(
466            "Exponent must be finite".to_string(),
467        ));
468    }
469
470    let mut result = Array1::zeros(n);
471
472    // For common exponents, use optimized SIMD operations
473    if (exponent - F::from(2.0).expect("Failed to convert constant to float")).abs()
474        < F::from(1e-10).expect("Failed to convert constant to float")
475    {
476        // Square using SIMD multiplication
477        result = F::simd_mul(&data.view(), &data.view());
478    } else if (exponent - F::from(0.5).expect("Failed to convert constant to float")).abs()
479        < F::from(1e-10).expect("Failed to convert constant to float")
480    {
481        // Square root using SIMD - check for non-negative values first
482        for &val in data.iter() {
483            if val < F::zero() {
484                return Err(TransformError::ComputationError(
485                    "Cannot compute square root of negative values".to_string(),
486                ));
487            }
488        }
489        result = F::simd_sqrt(&data.view());
490    } else if (exponent - F::from(3.0).expect("Failed to convert constant to float")).abs()
491        < F::from(1e-10).expect("Failed to convert constant to float")
492    {
493        // Cube: x^3 = x * x * x
494        let squared = F::simd_mul(&data.view(), &data.view());
495        result = F::simd_mul(&squared.view(), &data.view());
496    } else if (exponent - F::from(1.0).expect("Failed to convert constant to float")).abs()
497        < F::from(1e-10).expect("Failed to convert constant to float")
498    {
499        // Identity: x^1 = x
500        result = data.clone();
501    } else if (exponent - F::from(0.0).expect("Failed to convert constant to float")).abs()
502        < F::from(1e-10).expect("Failed to convert constant to float")
503    {
504        // Constant: x^0 = 1
505        result.fill(F::one());
506    } else {
507        // General case: use vectorized exponentiation
508        let exponent_array = Array1::from_elem(n, exponent);
509        // Fallback: element-wise power operation since simd_pow is not available
510        result = data.mapv(|x| x.powf(exponent));
511
512        // Validate results
513        for &val in result.iter() {
514            if !val.is_finite() {
515                return Err(TransformError::ComputationError(
516                    "Power operation produced non-finite values".to_string(),
517                ));
518            }
519        }
520    }
521
522    Ok(result)
523}
524
525/// SIMD-accelerated binarization with validation
526#[allow(dead_code)]
527pub fn simd_binarize<F>(data: &Array2<F>, threshold: F) -> Result<Array2<F>>
528where
529    F: Float + NumCast + SimdUnifiedOps,
530{
531    check_not_empty(data, "data")?;
532
533    // Check finite values
534    for &val in data.iter() {
535        if !val.is_finite() {
536            return Err(crate::error::TransformError::DataValidationError(
537                "Data contains non-finite values".to_string(),
538            ));
539        }
540    }
541
542    if !threshold.is_finite() {
543        return Err(TransformError::InvalidInput(
544            "Threshold must be finite".to_string(),
545        ));
546    }
547
548    let shape = data.shape();
549    let mut result = Array2::zeros((shape[0], shape[1]));
550
551    // Calculate adaptive chunk size based on data dimensions
552    let chunk_size = calculate_adaptive_chunk_size(shape[0], shape[1]);
553
554    for i in 0..shape[0] {
555        let row = data.row(i);
556        let row_array = row.to_owned();
557
558        // Process row in chunks using SIMD
559        for chunk_start in (0..shape[1]).step_by(chunk_size) {
560            let chunk_end = (chunk_start + chunk_size).min(shape[1]);
561            let chunk_size = chunk_end - chunk_start;
562
563            let chunk_slice = row_array.slice(scirs2_core::ndarray::s![chunk_start..chunk_end]);
564            let threshold_array = Array1::from_elem(chunk_size, threshold);
565
566            // Use SIMD comparison where available
567            // Fallback: element-wise comparison since simd_greater_than is not available
568            let comparison_result =
569                chunk_slice.mapv(|x| if x > threshold { F::one() } else { F::zero() });
570
571            for (j, &cmp_result) in comparison_result.iter().enumerate() {
572                result[[i, chunk_start + j]] = if cmp_result > F::zero() {
573                    F::one()
574                } else {
575                    F::zero()
576                };
577            }
578        }
579    }
580
581    Ok(result)
582}
583
584/// Calculate adaptive chunk size for optimal SIMD performance
585#[allow(dead_code)]
586fn calculate_adaptive_chunk_size(n_rows: usize, ncols: usize) -> usize {
587    const L1_CACHE_SIZE: usize = 32_768;
588    const F64_SIZE: usize = 8; // Conservative estimate for element size
589
590    // Calculate how many elements can fit comfortably in L1 cache
591    let cache_elements = L1_CACHE_SIZE / F64_SIZE / 4; // Conservative factor
592
593    // Adaptive chunk size based on matrix dimensions
594    let chunk_size = if ncols > cache_elements {
595        // Wide matrix: use smaller chunks
596        32
597    } else if n_rows > 10_000 {
598        // Many _rows: use medium chunks for better cache reuse
599        128
600    } else {
601        // Standard case: larger chunks for better vectorization
602        256
603    };
604
605    // Ensure chunk size is reasonable and aligned
606    chunk_size.min(ncols).max(16)
607}
608
609/// Advanced SIMD polynomial features with memory optimization
610#[allow(dead_code)]
611pub fn simd_polynomial_features_optimized<F>(
612    data: &Array2<F>,
613    degree: usize,
614    include_bias: bool,
615    interaction_only: bool,
616    memory_limit_mb: usize,
617) -> Result<Array2<F>>
618where
619    F: Float + NumCast + SimdUnifiedOps,
620{
621    check_not_empty(data, "data")?;
622
623    // Check finite values
624    for &val in data.iter() {
625        if !val.is_finite() {
626            return Err(crate::error::TransformError::DataValidationError(
627                "Data contains non-finite values".to_string(),
628            ));
629        }
630    }
631
632    check_positive(degree, "degree")?;
633
634    let poly_features = SimdPolynomialFeatures::new(degree, include_bias, interaction_only)?;
635
636    let shape = data.shape();
637    let element_size = std::mem::size_of::<F>();
638    let data_size_mb = (shape[0] * shape[1] * element_size) / (1024 * 1024);
639
640    if data_size_mb > memory_limit_mb {
641        // Use chunked processing for large datasets
642        simd_polynomial_features_chunked(data, &poly_features, memory_limit_mb)
643    } else {
644        // Standard processing
645        poly_features.transform(data)
646    }
647}
648
649/// Chunked SIMD polynomial features for large datasets
650#[allow(dead_code)]
651fn simd_polynomial_features_chunked<F>(
652    data: &Array2<F>,
653    poly_features: &SimdPolynomialFeatures<F>,
654    memory_limit_mb: usize,
655) -> Result<Array2<F>>
656where
657    F: Float + NumCast + SimdUnifiedOps,
658{
659    let shape = data.shape();
660    let element_size = std::mem::size_of::<F>();
661    let max_rows_per_chunk = (memory_limit_mb * 1024 * 1024) / (shape[1] * element_size * 2); // Factor of 2 for safety
662
663    if max_rows_per_chunk == 0 {
664        return Err(TransformError::MemoryError(
665            "Memory limit too small for processing".to_string(),
666        ));
667    }
668
669    // Process first chunk to determine output dimensions
670    let first_chunk_size = max_rows_per_chunk.min(shape[0]);
671    let first_chunk = data.slice(scirs2_core::ndarray::s![0..first_chunk_size, ..]);
672    let first_result = poly_features.transform(&first_chunk)?;
673    let n_outputfeatures = first_result.shape()[1];
674
675    // Initialize full output matrix
676    let mut output = Array2::zeros((shape[0], n_outputfeatures));
677
678    // Copy first chunk result
679    for i in 0..first_chunk_size {
680        for j in 0..n_outputfeatures {
681            output[[i, j]] = first_result[[i, j]];
682        }
683    }
684
685    // Process remaining chunks
686    for chunk_start in (first_chunk_size..shape[0]).step_by(max_rows_per_chunk) {
687        let chunk_end = (chunk_start + max_rows_per_chunk).min(shape[0]);
688        let chunk = data.slice(scirs2_core::ndarray::s![chunk_start..chunk_end, ..]);
689        let chunk_result = poly_features.transform(&chunk)?;
690
691        for (i_local, i_global) in (chunk_start..chunk_end).enumerate() {
692            for j in 0..n_outputfeatures {
693                output[[i_global, j]] = chunk_result[[i_local, j]];
694            }
695        }
696    }
697
698    Ok(output)
699}