Skip to main content

ferrolearn_preprocess/
polynomial_features.rs

1//! Polynomial features: generate polynomial and interaction features.
2//!
3//! Given input features `[a, b]` and degree 2, this transformer generates:
4//! - `[1, a, b, a², a·b, b²]` (default — `interaction_only = false`)
5//! - `[1, a, b, a·b]` (with `interaction_only = true`)
6//!
7//! With `include_bias = false`, the constant column `1` is omitted.
8//!
9//! This transformer is **stateless** — no fitting is needed. Call
10//! [`Transform::transform`] directly.
11
12use ferrolearn_core::error::FerroError;
13use ferrolearn_core::pipeline::{FittedPipelineTransformer, PipelineTransformer};
14use ferrolearn_core::traits::Transform;
15use ndarray::{Array1, Array2};
16use num_traits::Float;
17
18// ---------------------------------------------------------------------------
19// PolynomialFeatures
20// ---------------------------------------------------------------------------
21
22/// A stateless polynomial feature generator.
23///
24/// Generates all polynomial combinations of the input features up to the
25/// specified degree.
26///
27/// # Configuration
28///
29/// - `degree`: maximum polynomial degree (default `2`).
30/// - `interaction_only`: if `true`, only cross-product terms are generated
31///   (no pure powers like `a²`). Default `false`.
32/// - `include_bias`: if `true`, a constant column of ones is prepended.
33///   Default `true`.
34///
35/// # Examples
36///
37/// ```
38/// use ferrolearn_preprocess::polynomial_features::PolynomialFeatures;
39/// use ferrolearn_core::traits::Transform;
40/// use ndarray::array;
41///
42/// let poly = PolynomialFeatures::<f64>::new(2, false, true).unwrap();
43/// let x = array![[2.0, 3.0]];
44/// let out = poly.transform(&x).unwrap();
45/// // out = [[1, 2, 3, 4, 6, 9]]
46/// ```
47#[derive(Debug, Clone)]
48pub struct PolynomialFeatures<F> {
49    /// Maximum polynomial degree.
50    pub(crate) degree: usize,
51    /// If `true`, only interaction terms are produced (no pure powers).
52    pub(crate) interaction_only: bool,
53    /// If `true`, prepend a bias (constant ones) column.
54    pub(crate) include_bias: bool,
55    _marker: std::marker::PhantomData<F>,
56}
57
58impl<F: Float + Send + Sync + 'static> PolynomialFeatures<F> {
59    /// Create a new `PolynomialFeatures` transformer.
60    ///
61    /// # Errors
62    ///
63    /// Returns [`FerroError::InvalidParameter`] if `degree == 0`.
64    pub fn new(
65        degree: usize,
66        interaction_only: bool,
67        include_bias: bool,
68    ) -> Result<Self, FerroError> {
69        if degree == 0 {
70            return Err(FerroError::InvalidParameter {
71                name: "degree".into(),
72                reason: "degree must be at least 1".into(),
73            });
74        }
75        Ok(Self {
76            degree,
77            interaction_only,
78            include_bias,
79            _marker: std::marker::PhantomData,
80        })
81    }
82
83    /// Create a `PolynomialFeatures` with default settings:
84    /// degree=2, interaction_only=false, include_bias=true.
85    #[must_use]
86    pub fn default_config() -> Self {
87        Self {
88            degree: 2,
89            interaction_only: false,
90            include_bias: true,
91            _marker: std::marker::PhantomData,
92        }
93    }
94
95    /// Return the configured degree.
96    #[must_use]
97    pub fn degree(&self) -> usize {
98        self.degree
99    }
100
101    /// Return whether only interaction terms are generated.
102    #[must_use]
103    pub fn interaction_only(&self) -> bool {
104        self.interaction_only
105    }
106
107    /// Return whether a bias column is included.
108    #[must_use]
109    pub fn include_bias(&self) -> bool {
110        self.include_bias
111    }
112
113    /// Generate all combinations (with repetition unless `interaction_only`)
114    /// of feature indices up to `degree`.
115    ///
116    /// Returns a list of index-tuples, where each tuple specifies which
117    /// feature indices to multiply together to produce one output column.
118    fn feature_combinations(&self, n_features: usize) -> Vec<Vec<usize>> {
119        let mut combos: Vec<Vec<usize>> = Vec::new();
120
121        // Bias term: empty product = 1
122        if self.include_bias {
123            combos.push(vec![]);
124        }
125
126        // Generate combinations of degrees 1..=self.degree
127        let mut stack: Vec<(Vec<usize>, usize)> = Vec::new();
128
129        // Start with each feature at degree 1
130        for i in 0..n_features {
131            stack.push((vec![i], i));
132        }
133
134        while let Some((combo, last_idx)) = stack.pop() {
135            combos.push(combo.clone());
136
137            if combo.len() < self.degree {
138                // Extend with another feature
139                let start = if self.interaction_only {
140                    // Strictly increasing indices — no repeated features
141                    last_idx + 1
142                } else {
143                    // Non-decreasing indices — allows repeated features (pure powers)
144                    last_idx
145                };
146                for i in start..n_features {
147                    let mut new_combo = combo.clone();
148                    new_combo.push(i);
149                    stack.push((new_combo, i));
150                }
151            }
152        }
153
154        // Sort: bias first, then by combo length, then lexicographically
155        combos.sort_by(|a, b| a.len().cmp(&b.len()).then_with(|| a.cmp(b)));
156
157        combos
158    }
159}
160
161impl<F: Float + Send + Sync + 'static> Default for PolynomialFeatures<F> {
162    fn default() -> Self {
163        Self::default_config()
164    }
165}
166
167// ---------------------------------------------------------------------------
168// Trait implementations
169// ---------------------------------------------------------------------------
170
171impl<F: Float + Send + Sync + 'static> Transform<Array2<F>> for PolynomialFeatures<F> {
172    type Output = Array2<F>;
173    type Error = FerroError;
174
175    /// Generate polynomial and interaction features.
176    ///
177    /// # Errors
178    ///
179    /// Returns [`FerroError::InvalidParameter`] if the input has zero columns.
180    fn transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
181        let n_samples = x.nrows();
182        let n_features = x.ncols();
183
184        if n_features == 0 {
185            return Err(FerroError::InvalidParameter {
186                name: "x".into(),
187                reason: "input must have at least one column".into(),
188            });
189        }
190
191        let combos = self.feature_combinations(n_features);
192        let n_out = combos.len();
193
194        let mut out = Array2::zeros((n_samples, n_out));
195
196        for (k, combo) in combos.iter().enumerate() {
197            for i in 0..n_samples {
198                let val = combo.iter().fold(F::one(), |acc, &j| acc * x[[i, j]]);
199                out[[i, k]] = val;
200            }
201        }
202
203        Ok(out)
204    }
205}
206
207// ---------------------------------------------------------------------------
208// Pipeline integration (generic)
209// ---------------------------------------------------------------------------
210
211impl<F: Float + Send + Sync + 'static> PipelineTransformer<F> for PolynomialFeatures<F> {
212    /// Fit the polynomial features transformer using the pipeline interface.
213    ///
214    /// Because `PolynomialFeatures` is stateless, this simply boxes `self`
215    /// as a [`FittedPipelineTransformer`].
216    ///
217    /// # Errors
218    ///
219    /// This implementation never returns an error.
220    fn fit_pipeline(
221        &self,
222        _x: &Array2<F>,
223        _y: &Array1<F>,
224    ) -> Result<Box<dyn FittedPipelineTransformer<F>>, FerroError> {
225        Ok(Box::new(self.clone()))
226    }
227}
228
229impl<F: Float + Send + Sync + 'static> FittedPipelineTransformer<F> for PolynomialFeatures<F> {
230    /// Transform data using the pipeline interface.
231    ///
232    /// # Errors
233    ///
234    /// Propagates errors from [`Transform::transform`].
235    fn transform_pipeline(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
236        self.transform(x)
237    }
238}
239
240// ---------------------------------------------------------------------------
241// Tests
242// ---------------------------------------------------------------------------
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247    use approx::assert_abs_diff_eq;
248    use ndarray::array;
249
250    #[test]
251    fn test_degree2_two_features_with_bias() {
252        // degree=2, interaction_only=false, include_bias=true
253        // Expected: [1, a, b, a², a·b, b²]
254        let poly = PolynomialFeatures::<f64>::new(2, false, true).unwrap();
255        let x = array![[2.0, 3.0]];
256        let out = poly.transform(&x).unwrap();
257        assert_eq!(out.shape()[0], 1);
258        assert_eq!(out.shape()[1], 6); // 1 + 2 + 3 combinations
259        assert_abs_diff_eq!(out[[0, 0]], 1.0, epsilon = 1e-10); // bias
260        assert_abs_diff_eq!(out[[0, 1]], 2.0, epsilon = 1e-10); // a
261        assert_abs_diff_eq!(out[[0, 2]], 3.0, epsilon = 1e-10); // b
262        assert_abs_diff_eq!(out[[0, 3]], 4.0, epsilon = 1e-10); // a²
263        assert_abs_diff_eq!(out[[0, 4]], 6.0, epsilon = 1e-10); // a·b
264        assert_abs_diff_eq!(out[[0, 5]], 9.0, epsilon = 1e-10); // b²
265    }
266
267    #[test]
268    fn test_degree2_interaction_only() {
269        // degree=2, interaction_only=true, include_bias=true
270        // Expected: [1, a, b, a·b]
271        let poly = PolynomialFeatures::<f64>::new(2, true, true).unwrap();
272        let x = array![[2.0, 3.0]];
273        let out = poly.transform(&x).unwrap();
274        assert_eq!(out.shape()[1], 4);
275        assert_abs_diff_eq!(out[[0, 0]], 1.0, epsilon = 1e-10); // bias
276        assert_abs_diff_eq!(out[[0, 1]], 2.0, epsilon = 1e-10); // a
277        assert_abs_diff_eq!(out[[0, 2]], 3.0, epsilon = 1e-10); // b
278        assert_abs_diff_eq!(out[[0, 3]], 6.0, epsilon = 1e-10); // a·b
279    }
280
281    #[test]
282    fn test_no_bias() {
283        // degree=2, interaction_only=false, include_bias=false
284        // Expected: [a, b, a², a·b, b²]
285        let poly = PolynomialFeatures::<f64>::new(2, false, false).unwrap();
286        let x = array![[2.0, 3.0]];
287        let out = poly.transform(&x).unwrap();
288        assert_eq!(out.shape()[1], 5);
289        assert_abs_diff_eq!(out[[0, 0]], 2.0, epsilon = 1e-10); // a
290    }
291
292    #[test]
293    fn test_degree1_only_linear() {
294        let poly = PolynomialFeatures::<f64>::new(1, false, true).unwrap();
295        let x = array![[2.0, 3.0]];
296        let out = poly.transform(&x).unwrap();
297        // [1, a, b]
298        assert_eq!(out.shape()[1], 3);
299        assert_abs_diff_eq!(out[[0, 0]], 1.0, epsilon = 1e-10);
300        assert_abs_diff_eq!(out[[0, 1]], 2.0, epsilon = 1e-10);
301        assert_abs_diff_eq!(out[[0, 2]], 3.0, epsilon = 1e-10);
302    }
303
304    #[test]
305    fn test_multiple_rows() {
306        let poly = PolynomialFeatures::<f64>::new(2, false, false).unwrap();
307        let x = array![[1.0, 2.0], [3.0, 4.0]];
308        let out = poly.transform(&x).unwrap();
309        assert_eq!(out.shape(), &[2, 5]);
310        // Row 0: a=1, b=2 → [1, 2, 1, 2, 4]
311        assert_abs_diff_eq!(out[[0, 0]], 1.0, epsilon = 1e-10);
312        assert_abs_diff_eq!(out[[0, 1]], 2.0, epsilon = 1e-10);
313        assert_abs_diff_eq!(out[[0, 2]], 1.0, epsilon = 1e-10);
314        assert_abs_diff_eq!(out[[0, 3]], 2.0, epsilon = 1e-10);
315        assert_abs_diff_eq!(out[[0, 4]], 4.0, epsilon = 1e-10);
316    }
317
318    #[test]
319    fn test_single_feature_degree2() {
320        // [a] → [1, a, a²]
321        let poly = PolynomialFeatures::<f64>::new(2, false, true).unwrap();
322        let x = array![[3.0]];
323        let out = poly.transform(&x).unwrap();
324        assert_eq!(out.shape()[1], 3);
325        assert_abs_diff_eq!(out[[0, 0]], 1.0, epsilon = 1e-10);
326        assert_abs_diff_eq!(out[[0, 1]], 3.0, epsilon = 1e-10);
327        assert_abs_diff_eq!(out[[0, 2]], 9.0, epsilon = 1e-10);
328    }
329
330    #[test]
331    fn test_invalid_degree_zero() {
332        assert!(PolynomialFeatures::<f64>::new(0, false, true).is_err());
333    }
334
335    #[test]
336    fn test_default_config() {
337        let poly = PolynomialFeatures::<f64>::default();
338        assert_eq!(poly.degree(), 2);
339        assert!(!poly.interaction_only());
340        assert!(poly.include_bias());
341    }
342
343    #[test]
344    fn test_pipeline_integration() {
345        use ferrolearn_core::pipeline::PipelineTransformer;
346        let poly = PolynomialFeatures::<f64>::new(2, false, true).unwrap();
347        let x = array![[1.0, 2.0], [3.0, 4.0]];
348        let y = Array1::zeros(2);
349        let fitted = poly.fit_pipeline(&x, &y).unwrap();
350        let result = fitted.transform_pipeline(&x).unwrap();
351        assert_eq!(result.shape(), &[2, 6]);
352    }
353
354    #[test]
355    fn test_degree3_single_feature() {
356        // [a] with degree=3, no bias → [a, a², a³]
357        let poly = PolynomialFeatures::<f64>::new(3, false, false).unwrap();
358        let x = array![[2.0]];
359        let out = poly.transform(&x).unwrap();
360        assert_eq!(out.shape()[1], 3);
361        assert_abs_diff_eq!(out[[0, 0]], 2.0, epsilon = 1e-10);
362        assert_abs_diff_eq!(out[[0, 1]], 4.0, epsilon = 1e-10);
363        assert_abs_diff_eq!(out[[0, 2]], 8.0, epsilon = 1e-10);
364    }
365}