sklears_preprocessing/feature_engineering/
sparse_polynomial_features.rs

1//! Sparse polynomial features for memory-efficient high-dimensional feature generation
2
3use scirs2_core::ndarray::{Array1, Array2};
4use sklears_core::{
5    error::{Result, SklearsError},
6    traits::{Fit, Trained, Transform, Untrained},
7    types::Float,
8};
9use std::marker::PhantomData;
10
11/// Sparse representation of polynomial feature coefficient
12#[derive(Debug, Clone)]
13pub struct SparseCoefficient {
14    /// Feature indices involved in this term
15    pub feature_indices: Vec<usize>,
16    /// Powers of each feature in this term
17    pub powers: Vec<usize>,
18    /// The coefficient value (typically 1.0 for polynomial features)
19    pub coefficient: Float,
20}
21
22impl SparseCoefficient {
23    /// Create a new sparse coefficient
24    pub fn new(feature_indices: Vec<usize>, powers: Vec<usize>, coefficient: Float) -> Self {
25        assert_eq!(feature_indices.len(), powers.len());
26        Self {
27            feature_indices,
28            powers,
29            coefficient,
30        }
31    }
32
33    /// Evaluate this term for a given input sample
34    pub fn evaluate(&self, sample: &Array1<Float>) -> Float {
35        let mut result = self.coefficient;
36        for (&feature_idx, &power) in self.feature_indices.iter().zip(self.powers.iter()) {
37            if power > 0 && feature_idx < sample.len() {
38                result *= sample[feature_idx].powi(power as i32);
39            }
40        }
41        result
42    }
43
44    /// Get the total degree of this term
45    pub fn total_degree(&self) -> usize {
46        self.powers.iter().sum()
47    }
48
49    /// Check if this is an interaction term (all powers <= 1)
50    pub fn is_interaction(&self) -> bool {
51        self.powers.iter().all(|&p| p <= 1)
52    }
53}
54
55/// Configuration for SparsePolynomialFeatures
56#[derive(Debug, Clone)]
57pub struct SparsePolynomialFeaturesConfig {
58    /// The degree of the polynomial features
59    pub degree: usize,
60    /// Whether to include interaction terms only
61    pub interaction_only: bool,
62    /// Whether to include bias (constant) term
63    pub include_bias: bool,
64    /// Maximum number of terms to generate (memory limit)
65    pub max_terms: Option<usize>,
66    /// Minimum coefficient magnitude to keep (sparsity threshold)
67    pub min_coefficient: Float,
68    /// Whether to sort terms by total degree first, then lexicographically
69    pub sort_terms: bool,
70}
71
72impl Default for SparsePolynomialFeaturesConfig {
73    fn default() -> Self {
74        Self {
75            degree: 2,
76            interaction_only: false,
77            include_bias: true,
78            max_terms: None,
79            min_coefficient: 1e-12,
80            sort_terms: true,
81        }
82    }
83}
84
85/// SparsePolynomialFeatures generates sparse polynomial and interaction features
86///
87/// This is a memory-efficient version of PolynomialFeatures designed for high-dimensional data.
88/// Instead of storing all possible polynomial combinations, it stores only the terms that
89/// would actually be non-zero, using a sparse representation.
90///
91/// This is particularly useful when:
92/// - The input dimensionality is high (hundreds or thousands of features)
93/// - Many input features are zero or near-zero
94/// - Memory usage is a concern
95/// - You want to limit the total number of generated features
96pub struct SparsePolynomialFeatures<State = Untrained> {
97    config: SparsePolynomialFeaturesConfig,
98    state: PhantomData<State>,
99    // Fitted parameters
100    n_features_in_: Option<usize>,
101    n_output_features_: Option<usize>,
102    sparse_terms_: Option<Vec<SparseCoefficient>>,
103}
104
105impl SparsePolynomialFeatures<Untrained> {
106    /// Create a new SparsePolynomialFeatures
107    pub fn new() -> Self {
108        Self {
109            config: SparsePolynomialFeaturesConfig::default(),
110            state: PhantomData,
111            n_features_in_: None,
112            n_output_features_: None,
113            sparse_terms_: None,
114        }
115    }
116
117    /// Set the degree of polynomial features
118    pub fn degree(mut self, degree: usize) -> Self {
119        self.config.degree = degree;
120        self
121    }
122
123    /// Set whether to include interaction terms only
124    pub fn interaction_only(mut self, interaction_only: bool) -> Self {
125        self.config.interaction_only = interaction_only;
126        self
127    }
128
129    /// Set whether to include bias term
130    pub fn include_bias(mut self, include_bias: bool) -> Self {
131        self.config.include_bias = include_bias;
132        self
133    }
134
135    /// Set maximum number of terms to generate
136    pub fn max_terms(mut self, max_terms: usize) -> Self {
137        self.config.max_terms = Some(max_terms);
138        self
139    }
140
141    /// Set minimum coefficient magnitude threshold
142    pub fn min_coefficient(mut self, min_coefficient: Float) -> Self {
143        self.config.min_coefficient = min_coefficient;
144        self
145    }
146
147    /// Set whether to sort terms
148    pub fn sort_terms(mut self, sort_terms: bool) -> Self {
149        self.config.sort_terms = sort_terms;
150        self
151    }
152
153    /// Generate sparse polynomial terms
154    fn generate_sparse_terms(&self, n_features: usize) -> Vec<SparseCoefficient> {
155        let mut terms = Vec::new();
156
157        // Add bias term if requested
158        if self.config.include_bias {
159            terms.push(SparseCoefficient::new(vec![], vec![], 1.0));
160        }
161
162        // Add linear terms (degree 1)
163        for feature_idx in 0..n_features {
164            terms.push(SparseCoefficient::new(vec![feature_idx], vec![1], 1.0));
165        }
166
167        // Generate higher-degree terms
168        for degree in 2..=self.config.degree {
169            self.generate_degree_terms(n_features, degree, &mut terms);
170
171            // Check if we've exceeded the maximum number of terms
172            if let Some(max_terms) = self.config.max_terms {
173                if terms.len() >= max_terms {
174                    terms.truncate(max_terms);
175                    break;
176                }
177            }
178        }
179
180        // Sort terms if requested
181        if self.config.sort_terms {
182            terms.sort_by(|a, b| {
183                // Sort by total degree first, then lexicographically by feature indices
184                match a.total_degree().cmp(&b.total_degree()) {
185                    std::cmp::Ordering::Equal => a.feature_indices.cmp(&b.feature_indices),
186                    other => other,
187                }
188            });
189        }
190
191        terms
192    }
193
194    /// Generate all polynomial terms of a specific degree
195    fn generate_degree_terms(
196        &self,
197        n_features: usize,
198        degree: usize,
199        terms: &mut Vec<SparseCoefficient>,
200    ) {
201        if self.config.interaction_only && degree > 1 {
202            // For interaction only, generate all combinations of 'degree' different features
203            self.generate_interaction_combinations(n_features, degree, terms);
204        } else {
205            // Generate all possible polynomial terms of the given degree
206            self.generate_polynomial_combinations(n_features, degree, terms);
207        }
208    }
209
210    /// Generate interaction combinations (each feature appears at most once)
211    fn generate_interaction_combinations(
212        &self,
213        n_features: usize,
214        degree: usize,
215        terms: &mut Vec<SparseCoefficient>,
216    ) {
217        let mut combination = vec![0; degree];
218        self.generate_combinations_recursive(n_features, degree, 0, 0, &mut combination, terms);
219    }
220
221    /// Generate polynomial combinations (features can appear multiple times)
222    fn generate_polynomial_combinations(
223        &self,
224        n_features: usize,
225        degree: usize,
226        terms: &mut Vec<SparseCoefficient>,
227    ) {
228        let mut powers = vec![0; n_features];
229        self.generate_polynomial_recursive(n_features, degree, 0, &mut powers, terms);
230    }
231
232    /// Recursive helper for generating combinations
233    fn generate_combinations_recursive(
234        &self,
235        n_features: usize,
236        degree: usize,
237        start: usize,
238        pos: usize,
239        combination: &mut Vec<usize>,
240        terms: &mut Vec<SparseCoefficient>,
241    ) {
242        if pos == degree {
243            // Create a sparse coefficient for this combination
244            let feature_indices = combination.clone();
245            let powers = vec![1; degree];
246            terms.push(SparseCoefficient::new(feature_indices, powers, 1.0));
247            return;
248        }
249
250        for i in start..n_features {
251            combination[pos] = i;
252            self.generate_combinations_recursive(
253                n_features,
254                degree,
255                i + 1,
256                pos + 1,
257                combination,
258                terms,
259            );
260        }
261    }
262
263    /// Recursive helper for generating polynomial terms
264    fn generate_polynomial_recursive(
265        &self,
266        n_features: usize,
267        remaining_degree: usize,
268        feature_idx: usize,
269        powers: &mut Vec<usize>,
270        terms: &mut Vec<SparseCoefficient>,
271    ) {
272        if remaining_degree == 0 {
273            // Create sparse coefficient from current powers
274            let mut feature_indices = Vec::new();
275            let mut term_powers = Vec::new();
276
277            for (idx, &power) in powers.iter().enumerate() {
278                if power > 0 {
279                    feature_indices.push(idx);
280                    term_powers.push(power);
281                }
282            }
283
284            if !feature_indices.is_empty() {
285                terms.push(SparseCoefficient::new(feature_indices, term_powers, 1.0));
286            }
287            return;
288        }
289
290        if feature_idx >= n_features {
291            return;
292        }
293
294        // Try all possible powers for current feature
295        for power in 0..=remaining_degree {
296            powers[feature_idx] = power;
297            self.generate_polynomial_recursive(
298                n_features,
299                remaining_degree - power,
300                feature_idx + 1,
301                powers,
302                terms,
303            );
304        }
305        powers[feature_idx] = 0; // Reset for backtracking
306    }
307}
308
309impl SparsePolynomialFeatures<Trained> {
310    /// Get the number of input features
311    pub fn n_features_in(&self) -> usize {
312        self.n_features_in_.expect("Not fitted")
313    }
314
315    /// Get the number of output features
316    pub fn n_output_features(&self) -> usize {
317        self.n_output_features_.expect("Not fitted")
318    }
319
320    /// Get the sparse terms
321    pub fn sparse_terms(&self) -> &[SparseCoefficient] {
322        self.sparse_terms_.as_ref().expect("Not fitted")
323    }
324
325    /// Get memory usage information
326    pub fn memory_info(&self) -> SparseMemoryInfo {
327        let sparse_terms = self.sparse_terms();
328        let n_terms = sparse_terms.len();
329        let avg_features_per_term = if n_terms > 0 {
330            sparse_terms
331                .iter()
332                .map(|term| term.feature_indices.len())
333                .sum::<usize>() as f64
334                / n_terms as f64
335        } else {
336            0.0
337        };
338
339        let n_features = self.n_features_in();
340        let theoretical_dense_features = if self.config.include_bias {
341            // Binomial coefficient: C(n + d, d) for polynomial features with bias
342            self.binomial_coefficient(n_features + self.config.degree, self.config.degree)
343        } else {
344            self.binomial_coefficient(n_features + self.config.degree, self.config.degree) - 1
345        };
346
347        let sparsity_ratio = n_terms as f64 / theoretical_dense_features as f64;
348
349        SparseMemoryInfo {
350            n_sparse_terms: n_terms,
351            theoretical_dense_features,
352            sparsity_ratio,
353            avg_features_per_term,
354            memory_reduction_factor: theoretical_dense_features as f64 / n_terms as f64,
355        }
356    }
357
358    /// Calculate binomial coefficient C(n, k)
359    fn binomial_coefficient(&self, n: usize, k: usize) -> usize {
360        if k > n {
361            return 0;
362        }
363        if k == 0 || k == n {
364            return 1;
365        }
366
367        let k = if k > n - k { n - k } else { k }; // Take advantage of symmetry
368
369        let mut result = 1;
370        for i in 0..k {
371            result = result * (n - i) / (i + 1);
372        }
373        result
374    }
375}
376
377impl Default for SparsePolynomialFeatures<Untrained> {
378    fn default() -> Self {
379        Self::new()
380    }
381}
382
383impl Fit<Array2<Float>, ()> for SparsePolynomialFeatures<Untrained> {
384    type Fitted = SparsePolynomialFeatures<Trained>;
385
386    fn fit(self, x: &Array2<Float>, _y: &()) -> Result<Self::Fitted> {
387        let (n_samples, n_features) = x.dim();
388
389        if n_samples == 0 {
390            return Err(SklearsError::InvalidParameter {
391                name: "n_samples".to_string(),
392                reason: "Cannot fit on empty dataset".to_string(),
393            });
394        }
395
396        if self.config.degree == 0 {
397            return Err(SklearsError::InvalidParameter {
398                name: "degree".to_string(),
399                reason: "Degree must be positive".to_string(),
400            });
401        }
402
403        // Generate sparse terms
404        let sparse_terms = self.generate_sparse_terms(n_features);
405        let n_output_features = sparse_terms.len();
406
407        Ok(SparsePolynomialFeatures {
408            config: self.config,
409            state: PhantomData,
410            n_features_in_: Some(n_features),
411            n_output_features_: Some(n_output_features),
412            sparse_terms_: Some(sparse_terms),
413        })
414    }
415}
416
417impl Transform<Array2<Float>, Array2<Float>> for SparsePolynomialFeatures<Trained> {
418    fn transform(&self, x: &Array2<Float>) -> Result<Array2<Float>> {
419        let (n_samples, n_features) = x.dim();
420
421        if n_features != self.n_features_in() {
422            return Err(SklearsError::FeatureMismatch {
423                expected: self.n_features_in(),
424                actual: n_features,
425            });
426        }
427
428        let sparse_terms = self.sparse_terms();
429        let n_output_features = sparse_terms.len();
430
431        let mut output = Array2::zeros((n_samples, n_output_features));
432
433        for (sample_idx, sample) in x.rows().into_iter().enumerate() {
434            let sample_array = sample.to_owned();
435            for (term_idx, term) in sparse_terms.iter().enumerate() {
436                output[[sample_idx, term_idx]] = term.evaluate(&sample_array);
437            }
438        }
439
440        Ok(output)
441    }
442}
443
444/// Memory usage information for sparse polynomial features
445#[derive(Debug, Clone)]
446pub struct SparseMemoryInfo {
447    /// Number of sparse terms actually generated
448    pub n_sparse_terms: usize,
449    /// Number of features a dense implementation would generate
450    pub theoretical_dense_features: usize,
451    /// Ratio of sparse to dense features (sparsity)
452    pub sparsity_ratio: f64,
453    /// Average number of features per sparse term
454    pub avg_features_per_term: f64,
455    /// Factor by which memory is reduced compared to dense
456    pub memory_reduction_factor: f64,
457}
458
459#[allow(non_snake_case)]
460#[cfg(test)]
461mod tests {
462    use super::*;
463    use scirs2_core::ndarray::array;
464
465    #[test]
466    fn test_sparse_coefficient() {
467        let coeff = SparseCoefficient::new(vec![0, 1], vec![2, 1], 1.5);
468
469        assert_eq!(coeff.total_degree(), 3);
470        assert!(!coeff.is_interaction());
471
472        let sample = array![2.0, 3.0];
473        let result = coeff.evaluate(&sample);
474        // Should be 1.5 * 2^2 * 3^1 = 1.5 * 4 * 3 = 18.0
475        assert_eq!(result, 18.0);
476    }
477
478    #[test]
479    fn test_sparse_polynomial_features_basic() {
480        let transformer = SparsePolynomialFeatures::new().degree(2).include_bias(true);
481
482        let x = array![[1.0, 2.0], [3.0, 4.0]];
483        let fitted = transformer.fit(&x, &()).unwrap();
484
485        assert_eq!(fitted.n_features_in(), 2);
486        assert!(fitted.n_output_features() >= 5); // bias + x1 + x2 + x1^2 + x1*x2 + x2^2
487
488        let transformed = fitted.transform(&x).unwrap();
489        assert_eq!(transformed.nrows(), 2);
490        assert_eq!(transformed.ncols(), fitted.n_output_features());
491    }
492
493    #[test]
494    fn test_interaction_only() {
495        let transformer = SparsePolynomialFeatures::new()
496            .degree(2)
497            .interaction_only(true)
498            .include_bias(false);
499
500        let x = array![[1.0, 2.0, 3.0]];
501        let fitted = transformer.fit(&x, &()).unwrap();
502
503        let transformed = fitted.transform(&x).unwrap();
504
505        // Should have: x1, x2, x3, x1*x2, x1*x3, x2*x3 = 6 features
506        assert_eq!(transformed.ncols(), 6);
507    }
508
509    #[test]
510    fn test_max_terms_limit() {
511        let transformer = SparsePolynomialFeatures::new().degree(3).max_terms(10);
512
513        let x = array![[1.0, 2.0, 3.0, 4.0, 5.0]];
514        let fitted = transformer.fit(&x, &()).unwrap();
515
516        // Should be limited to 10 terms
517        assert_eq!(fitted.n_output_features(), 10);
518    }
519
520    #[test]
521    fn test_memory_info() {
522        let transformer = SparsePolynomialFeatures::new().degree(2).max_terms(10);
523
524        let x = array![[1.0, 2.0, 3.0]];
525        let fitted = transformer.fit(&x, &()).unwrap();
526
527        let memory_info = fitted.memory_info();
528        assert_eq!(memory_info.n_sparse_terms, fitted.n_output_features());
529        assert!(memory_info.sparsity_ratio <= 1.0);
530        assert!(memory_info.memory_reduction_factor >= 1.0);
531    }
532}