sklears_kernel_approximation/
polynomial_features.rs

1//! Explicit polynomial feature maps
2
3use scirs2_core::ndarray::Array2;
4use sklears_core::{
5    error::{Result, SklearsError},
6    traits::{Estimator, Fit, Trained, Transform, Untrained},
7    types::Float,
8};
9use std::marker::PhantomData;
10
11/// Polynomial feature expansion
12///
13/// Generate polynomial and interaction features up to a given degree.
14/// For example, if the input has features [a, b], degree 2 gives:
15/// [1, a, b, a^2, ab, b^2]
16///
17/// # Parameters
18///
19/// * `degree` - Maximum degree of polynomial features (default: 2)
20/// * `interaction_only` - Include only interaction features (default: false)
21/// * `include_bias` - Include bias column (default: true)
22///
23/// # Examples
24///
25/// ```rust,ignore
26/// use sklears_kernel_approximation::polynomial_features::PolynomialFeatures;
27/// use sklears_core::traits::{Transform, Fit, Untrained}
28/// use scirs2_core::ndarray::array;
29///
30/// let X = array![[1.0, 2.0], [3.0, 4.0]];
31///
32/// let poly = PolynomialFeatures::new(2);
33/// let fitted_poly = poly.fit(&X, &()).unwrap();
34/// let X_transformed = fitted_poly.transform(&X).unwrap();
35/// // Features: [1, a, b, a^2, ab, b^2] = 6 features
36/// assert_eq!(X_transformed.shape(), &[2, 6]);
37/// ```
38#[derive(Debug, Clone)]
39/// PolynomialFeatures
40pub struct PolynomialFeatures<State = Untrained> {
41    /// Maximum degree of polynomial features
42    pub degree: u32,
43    /// Include only interaction features
44    pub interaction_only: bool,
45    /// Include bias column
46    pub include_bias: bool,
47
48    // Fitted attributes
49    n_input_features_: Option<usize>,
50    n_output_features_: Option<usize>,
51    powers_: Option<Vec<Vec<u32>>>,
52
53    _state: PhantomData<State>,
54}
55
56impl PolynomialFeatures<Untrained> {
57    /// Create a new polynomial features transformer
58    pub fn new(degree: u32) -> Self {
59        Self {
60            degree,
61            interaction_only: false,
62            include_bias: true,
63            n_input_features_: None,
64            n_output_features_: None,
65            powers_: None,
66            _state: PhantomData,
67        }
68    }
69
70    /// Set interaction_only parameter
71    pub fn interaction_only(mut self, interaction_only: bool) -> Self {
72        self.interaction_only = interaction_only;
73        self
74    }
75
76    /// Set include_bias parameter
77    pub fn include_bias(mut self, include_bias: bool) -> Self {
78        self.include_bias = include_bias;
79        self
80    }
81}
82
83impl Estimator for PolynomialFeatures<Untrained> {
84    type Config = ();
85    type Error = SklearsError;
86    type Float = Float;
87
88    fn config(&self) -> &Self::Config {
89        &()
90    }
91}
92
93impl Fit<Array2<Float>, ()> for PolynomialFeatures<Untrained> {
94    type Fitted = PolynomialFeatures<Trained>;
95
96    fn fit(self, x: &Array2<Float>, _y: &()) -> Result<Self::Fitted> {
97        let (_, n_features) = x.dim();
98
99        if self.degree == 0 {
100            return Err(SklearsError::InvalidInput(
101                "degree must be positive".to_string(),
102            ));
103        }
104
105        // Generate all combinations of powers
106        let powers = self.generate_powers(n_features)?;
107        let n_output_features = powers.len();
108
109        Ok(PolynomialFeatures {
110            degree: self.degree,
111            interaction_only: self.interaction_only,
112            include_bias: self.include_bias,
113            n_input_features_: Some(n_features),
114            n_output_features_: Some(n_output_features),
115            powers_: Some(powers),
116            _state: PhantomData,
117        })
118    }
119}
120
121impl PolynomialFeatures<Untrained> {
122    fn generate_powers(&self, n_features: usize) -> Result<Vec<Vec<u32>>> {
123        let mut powers = Vec::new();
124
125        // Add bias term if requested
126        if self.include_bias {
127            powers.push(vec![0; n_features]);
128        }
129
130        // Generate all combinations up to degree
131        self.generate_all_combinations(n_features, self.degree, &mut powers);
132
133        Ok(powers)
134    }
135
136    fn generate_all_combinations(
137        &self,
138        n_features: usize,
139        max_degree: u32,
140        powers: &mut Vec<Vec<u32>>,
141    ) {
142        // Generate all combinations with total degree from 1 to max_degree
143        for total_degree in 1..=max_degree {
144            self.generate_combinations_with_total_degree(
145                n_features,
146                total_degree,
147                0,
148                &mut vec![0; n_features],
149                powers,
150            );
151        }
152    }
153
154    fn generate_combinations_with_total_degree(
155        &self,
156        n_features: usize,
157        total_degree: u32,
158        feature_idx: usize,
159        current: &mut Vec<u32>,
160        powers: &mut Vec<Vec<u32>>,
161    ) {
162        if feature_idx == n_features {
163            let sum: u32 = current.iter().sum();
164            if sum == total_degree {
165                // Check if it's valid for interaction_only mode
166                if !self.interaction_only || self.is_valid_for_interaction_only(current) {
167                    powers.push(current.clone());
168                }
169            }
170            return;
171        }
172
173        let current_sum: u32 = current.iter().sum();
174        let remaining_degree = total_degree - current_sum;
175
176        // Try different powers for current feature
177        for power in 0..=remaining_degree {
178            current[feature_idx] = power;
179            self.generate_combinations_with_total_degree(
180                n_features,
181                total_degree,
182                feature_idx + 1,
183                current,
184                powers,
185            );
186        }
187        current[feature_idx] = 0;
188    }
189
190    fn is_valid_for_interaction_only(&self, powers: &[u32]) -> bool {
191        let non_zero_count = powers.iter().filter(|&&p| p > 0).count();
192        let max_power = powers.iter().max().unwrap_or(&0);
193
194        // Valid if:
195        // 1. It's a linear term (single variable with power 1, like a, b)
196        // 2. It's an interaction term (multiple variables, each with power 1, like ab)
197        // Invalid: pure polynomial terms where one variable has power > 1 (like a^2, b^2)
198
199        if non_zero_count == 1 {
200            // Single variable: valid only if power is 1
201            *max_power == 1
202        } else {
203            // Multiple variables: valid only if all powers are 1 (pure interactions)
204            *max_power == 1
205        }
206    }
207}
208
209impl Transform<Array2<Float>, Array2<Float>> for PolynomialFeatures<Trained> {
210    fn transform(&self, x: &Array2<Float>) -> Result<Array2<Float>> {
211        let (n_samples, n_features) = x.dim();
212        let n_input_features = self.n_input_features_.unwrap();
213        let n_output_features = self.n_output_features_.unwrap();
214        let powers = self.powers_.as_ref().unwrap();
215
216        if n_features != n_input_features {
217            return Err(SklearsError::InvalidInput(format!(
218                "X has {} features, but PolynomialFeatures was fitted with {} features",
219                n_features, n_input_features
220            )));
221        }
222
223        let mut result = Array2::zeros((n_samples, n_output_features));
224
225        for i in 0..n_samples {
226            for (j, power_combination) in powers.iter().enumerate() {
227                let mut feature_value = 1.0;
228                for (k, &power) in power_combination.iter().enumerate() {
229                    if power > 0 {
230                        feature_value *= x[[i, k]].powi(power as i32);
231                    }
232                }
233                result[[i, j]] = feature_value;
234            }
235        }
236
237        Ok(result)
238    }
239}
240
241impl PolynomialFeatures<Trained> {
242    /// Get the number of input features
243    pub fn n_input_features(&self) -> usize {
244        self.n_input_features_.unwrap()
245    }
246
247    /// Get the number of output features
248    pub fn n_output_features(&self) -> usize {
249        self.n_output_features_.unwrap()
250    }
251
252    /// Get the powers for each feature
253    pub fn powers(&self) -> &[Vec<u32>] {
254        self.powers_.as_ref().unwrap()
255    }
256}
257
258#[allow(non_snake_case)]
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use scirs2_core::ndarray::array;
263
264    #[test]
265    fn test_polynomial_features_basic() {
266        let x = array![[1.0, 2.0], [3.0, 4.0]];
267
268        let poly = PolynomialFeatures::new(2);
269        let fitted = poly.fit(&x, &()).unwrap();
270        let x_transformed = fitted.transform(&x).unwrap();
271
272        // Features: [1, b, a, b^2, ab, a^2] = 6 features
273        assert_eq!(x_transformed.shape(), &[2, 6]);
274
275        // Check first sample: [1, 2, 1, 4, 2, 1]
276        assert!((x_transformed[[0, 0]] - 1.0).abs() < 1e-10); // bias
277        assert!((x_transformed[[0, 1]] - 2.0).abs() < 1e-10); // b
278        assert!((x_transformed[[0, 2]] - 1.0).abs() < 1e-10); // a
279        assert!((x_transformed[[0, 3]] - 4.0).abs() < 1e-10); // b^2
280        assert!((x_transformed[[0, 4]] - 2.0).abs() < 1e-10); // ab
281        assert!((x_transformed[[0, 5]] - 1.0).abs() < 1e-10); // a^2
282    }
283
284    #[test]
285    fn test_polynomial_features_no_bias() {
286        let x = array![[1.0, 2.0], [3.0, 4.0]];
287
288        let poly = PolynomialFeatures::new(2).include_bias(false);
289        let fitted = poly.fit(&x, &()).unwrap();
290        let x_transformed = fitted.transform(&x).unwrap();
291
292        // Features: [a, b, a^2, ab, b^2] = 5 features
293        assert_eq!(x_transformed.shape(), &[2, 5]);
294    }
295
296    #[test]
297    fn test_polynomial_features_interaction_only() {
298        let x = array![[1.0, 2.0], [3.0, 4.0]];
299
300        let poly = PolynomialFeatures::new(2).interaction_only(true);
301        let fitted = poly.fit(&x, &()).unwrap();
302        let x_transformed = fitted.transform(&x).unwrap();
303
304        println!("Interaction only powers: {:?}", fitted.powers());
305        println!("Interaction only shape: {:?}", x_transformed.shape());
306
307        // Features should include bias + individual features + interactions
308        // But with interaction_only=true, we should exclude pure powers like a^2, b^2
309        // So: [1, a, b, ab] = 4 features
310        assert!(x_transformed.ncols() >= 2); // At least bias and interactions
311    }
312
313    #[test]
314    fn test_polynomial_features_degree_3() {
315        let x = array![[1.0, 2.0]];
316
317        let poly = PolynomialFeatures::new(3).include_bias(false);
318        let fitted = poly.fit(&x, &()).unwrap();
319        let x_transformed = fitted.transform(&x).unwrap();
320
321        // Features: [a, b, a^2, ab, b^2, a^3, a^2b, ab^2, b^3] = 9 features
322        assert_eq!(x_transformed.shape(), &[1, 9]);
323    }
324
325    #[test]
326    fn test_polynomial_features_zero_degree() {
327        let x = array![[1.0, 2.0]];
328        let poly = PolynomialFeatures::new(0);
329        let result = poly.fit(&x, &());
330        assert!(result.is_err());
331    }
332
333    #[test]
334    fn test_polynomial_features_feature_mismatch() {
335        let x_train = array![[1.0, 2.0], [3.0, 4.0]];
336        let x_test = array![[1.0, 2.0, 3.0]]; // Different number of features
337
338        let poly = PolynomialFeatures::new(2);
339        let fitted = poly.fit(&x_train, &()).unwrap();
340        let result = fitted.transform(&x_test);
341        assert!(result.is_err());
342    }
343
344    #[test]
345    fn test_polynomial_features_single_feature() {
346        let x = array![[2.0], [3.0]];
347
348        let poly = PolynomialFeatures::new(3);
349        let fitted = poly.fit(&x, &()).unwrap();
350        let x_transformed = fitted.transform(&x).unwrap();
351
352        // Features: [1, a, a^2, a^3] = 4 features
353        assert_eq!(x_transformed.shape(), &[2, 4]);
354
355        // Check first sample: [1, 2, 4, 8]
356        assert!((x_transformed[[0, 0]] - 1.0).abs() < 1e-10);
357        assert!((x_transformed[[0, 1]] - 2.0).abs() < 1e-10);
358        assert!((x_transformed[[0, 2]] - 4.0).abs() < 1e-10);
359        assert!((x_transformed[[0, 3]] - 8.0).abs() < 1e-10);
360    }
361}