1use scirs2_core::ndarray::Array2;
6use sklears_core::{
7 error::{Result, SklearsError},
8 traits::{Fit, Trained, Transform, Untrained},
9 types::Float,
10};
11use std::marker::PhantomData;
12
13#[derive(Debug, Clone)]
15pub struct PolynomialFeaturesConfig {
16 pub degree: usize,
18 pub interaction_only: bool,
20 pub include_bias: bool,
22 pub order: FeatureOrder,
24 pub interaction_depth: Option<usize>,
26 pub max_features: Option<usize>,
28 pub feature_selection: bool,
30 pub alpha: Float,
32}
33
34impl Default for PolynomialFeaturesConfig {
35 fn default() -> Self {
36 Self {
37 degree: 2,
38 interaction_only: false,
39 include_bias: true,
40 order: FeatureOrder::C,
41 interaction_depth: None,
42 max_features: None,
43 feature_selection: false,
44 alpha: 1.0,
45 }
46 }
47}
48
49#[derive(Debug, Clone, Copy, Default)]
51pub enum FeatureOrder {
52 #[default]
54 C,
55 F,
57}
58
59#[derive(Debug, Clone)]
66pub struct PolynomialFeatures<State = Untrained> {
67 config: PolynomialFeaturesConfig,
68 state: PhantomData<State>,
69 n_features_in_: Option<usize>,
71 n_output_features_: Option<usize>,
72 powers_: Option<Array2<usize>>,
73}
74
75impl PolynomialFeatures<Untrained> {
76 pub fn new() -> Self {
78 Self {
79 config: PolynomialFeaturesConfig::default(),
80 state: PhantomData,
81 n_features_in_: None,
82 n_output_features_: None,
83 powers_: None,
84 }
85 }
86
87 pub fn with_config(config: PolynomialFeaturesConfig) -> Self {
89 Self {
90 config,
91 state: PhantomData,
92 n_features_in_: None,
93 n_output_features_: None,
94 powers_: None,
95 }
96 }
97
98 pub fn degree(mut self, degree: usize) -> Self {
100 self.config.degree = degree;
101 self
102 }
103
104 pub fn interaction_only(mut self, interaction_only: bool) -> Self {
106 self.config.interaction_only = interaction_only;
107 self
108 }
109
110 pub fn include_bias(mut self, include_bias: bool) -> Self {
112 self.config.include_bias = include_bias;
113 self
114 }
115
116 pub fn interaction_depth(mut self, depth: Option<usize>) -> Self {
118 self.config.interaction_depth = depth;
119 self
120 }
121
122 pub fn max_features(mut self, max_features: Option<usize>) -> Self {
124 self.config.max_features = max_features;
125 self
126 }
127
128 pub fn with_feature_selection(mut self, alpha: Float) -> Self {
130 self.config.feature_selection = true;
131 self.config.alpha = alpha;
132 self
133 }
134
135 fn get_powers(&self, n_features: usize) -> Array2<usize> {
137 let mut powers = Vec::new();
138
139 if self.config.include_bias {
141 powers.push(vec![0; n_features]);
142 }
143
144 if self.config.interaction_only {
146 self.generate_interaction_powers(n_features, &mut powers);
148 } else {
149 self.generate_all_powers(n_features, &mut powers);
151 }
152
153 powers.sort();
155 powers.dedup();
156
157 self.sort_powers(&mut powers, n_features);
159
160 if self.config.feature_selection {
162 powers = self.select_features(powers);
163 }
164
165 if let Some(max_features) = self.config.max_features {
167 powers.truncate(max_features);
168 }
169
170 let n_output = powers.len();
171 let mut powers_array = Array2::zeros((n_output, n_features));
172
173 for (i, power_vec) in powers.iter().enumerate() {
174 for (j, &power) in power_vec.iter().enumerate() {
175 powers_array[[i, j]] = power;
176 }
177 }
178
179 powers_array
180 }
181
182 fn generate_all_powers(&self, n_features: usize, powers: &mut Vec<Vec<usize>>) {
184 fn generate_recursive(
185 current: Vec<usize>,
186 n_features: usize,
187 max_degree: usize,
188 powers: &mut Vec<Vec<usize>>,
189 interaction_depth: Option<usize>,
190 ) {
191 let current_degree: usize = current.iter().sum();
192 if current_degree > max_degree {
193 return;
194 }
195
196 if let Some(max_depth) = interaction_depth {
198 let non_zero_count = current.iter().filter(|&&x| x > 0).count();
199 if non_zero_count > max_depth {
200 return;
201 }
202 }
203
204 if current_degree > 0 {
205 powers.push(current.clone());
206 }
207
208 for i in 0..n_features {
209 let mut next = current.clone();
210 next[i] += 1;
211 if next.iter().sum::<usize>() <= max_degree {
212 generate_recursive(next, n_features, max_degree, powers, interaction_depth);
213 }
214 }
215 }
216
217 let initial = vec![0; n_features];
218 generate_recursive(
219 initial,
220 n_features,
221 self.config.degree,
222 powers,
223 self.config.interaction_depth,
224 );
225 }
226
227 fn generate_interaction_powers(&self, n_features: usize, powers: &mut Vec<Vec<usize>>) {
229 fn generate_interactions(
230 current: Vec<usize>,
231 start_idx: usize,
232 n_features: usize,
233 max_degree: usize,
234 powers: &mut Vec<Vec<usize>>,
235 interaction_depth: Option<usize>,
236 ) {
237 let current_degree: usize = current.iter().sum();
238
239 if current_degree > max_degree {
240 return;
241 }
242
243 if let Some(max_depth) = interaction_depth {
245 let non_zero_count = current.iter().filter(|&&x| x > 0).count();
246 if non_zero_count > max_depth {
247 return;
248 }
249 }
250
251 let non_zero_count = current.iter().filter(|&&x| x > 0).count();
253 if current_degree > 0 && non_zero_count >= 2 {
254 powers.push(current.clone());
255 }
256
257 for i in start_idx..n_features {
258 let mut next = current.clone();
259 next[i] += 1;
260 if next.iter().sum::<usize>() <= max_degree {
261 generate_interactions(
262 next,
263 i,
264 n_features,
265 max_degree,
266 powers,
267 interaction_depth,
268 );
269 }
270 }
271 }
272
273 let initial = vec![0; n_features];
274 generate_interactions(
275 initial,
276 0,
277 n_features,
278 self.config.degree,
279 powers,
280 self.config.interaction_depth,
281 );
282 }
283
284 fn select_features(&self, mut powers: Vec<Vec<usize>>) -> Vec<Vec<usize>> {
286 if self.config.alpha > 1.0 {
289 let keep_ratio = 1.0 / self.config.alpha;
290 let keep_count = ((powers.len() as f64) * keep_ratio).ceil() as usize;
291 powers.truncate(keep_count);
292 }
293 powers
294 }
295
296 fn sort_powers(&self, powers: &mut Vec<Vec<usize>>, _n_features: usize) {
298 powers.sort_by(|a, b| {
301 let degree_a: usize = a.iter().sum();
302 let degree_b: usize = b.iter().sum();
303
304 degree_a.cmp(°ree_b).then_with(|| {
306 b.cmp(a)
309 })
310 });
311 }
312}
313
314impl<State> PolynomialFeatures<State> {
316 pub fn get_n_output_features(
318 n_features: usize,
319 degree: usize,
320 include_bias: bool,
321 interaction_only: bool,
322 ) -> usize {
323 use std::collections::HashMap;
324
325 fn binomial_coefficient(n: usize, k: usize) -> usize {
326 if k > n {
327 return 0;
328 }
329 if k == 0 || k == n {
330 return 1;
331 }
332
333 let mut memo = HashMap::new();
334 fn binomial_memo(
335 n: usize,
336 k: usize,
337 memo: &mut HashMap<(usize, usize), usize>,
338 ) -> usize {
339 if let Some(&result) = memo.get(&(n, k)) {
340 return result;
341 }
342
343 let result = if k > n {
344 0
345 } else if k == 0 || k == n {
346 1
347 } else {
348 binomial_memo(n - 1, k - 1, memo) + binomial_memo(n - 1, k, memo)
349 };
350
351 memo.insert((n, k), result);
352 result
353 }
354
355 binomial_memo(n, k, &mut memo)
356 }
357
358 let mut count = 0;
359
360 if include_bias {
361 count += 1;
362 }
363
364 if interaction_only {
365 for d in 2..=degree {
367 count += binomial_coefficient(n_features, d);
368 }
369 } else {
370 for d in 1..=degree {
372 count += binomial_coefficient(n_features + d - 1, d);
373 }
374 }
375
376 count
377 }
378}
379
380impl PolynomialFeatures<Trained> {
381 pub fn n_output_features(&self) -> usize {
383 self.n_output_features_.unwrap_or(0)
384 }
385
386 pub fn powers(&self) -> Option<&Array2<usize>> {
388 self.powers_.as_ref()
389 }
390
391 pub fn get_feature_names(&self, input_features: Option<&[String]>) -> Vec<String> {
393 let powers = match &self.powers_ {
394 Some(p) => p,
395 None => return Vec::new(),
396 };
397
398 let n_features = self.n_features_in_.unwrap_or(0);
399 let feature_names = match input_features {
400 Some(names) => names.to_vec(),
401 None => (0..n_features).map(|i| format!("x{}", i)).collect(),
402 };
403
404 let mut output_names = Vec::new();
405
406 for i in 0..powers.nrows() {
407 let power_row = powers.row(i);
408 let mut name_parts = Vec::new();
409
410 for (j, &power) in power_row.iter().enumerate() {
411 if power == 0 {
412 continue;
413 } else if power == 1 {
414 name_parts.push(feature_names[j].clone());
415 } else {
416 name_parts.push(format!("{}^{}", feature_names[j], power));
417 }
418 }
419
420 let feature_name = if name_parts.is_empty() {
421 "1".to_string() } else {
423 name_parts.join(" ")
424 };
425
426 output_names.push(feature_name);
427 }
428
429 output_names
430 }
431}
432
433impl Default for PolynomialFeatures<Untrained> {
434 fn default() -> Self {
435 Self::new()
436 }
437}
438
439impl Fit<Array2<Float>, ()> for PolynomialFeatures<Untrained> {
440 type Fitted = PolynomialFeatures<Trained>;
441
442 fn fit(self, x: &Array2<Float>, _y: &()) -> Result<Self::Fitted> {
443 let n_features = x.ncols();
444
445 if n_features == 0 {
446 return Err(SklearsError::InvalidInput(
447 "Input must have at least one feature".to_string(),
448 ));
449 }
450
451 if self.config.degree == 0 {
452 return Err(SklearsError::InvalidInput(
453 "Degree must be at least 1".to_string(),
454 ));
455 }
456
457 let powers = self.get_powers(n_features);
458 let n_output_features = powers.nrows();
459
460 Ok(PolynomialFeatures::<Trained> {
461 config: self.config,
462 state: PhantomData,
463 n_features_in_: Some(n_features),
464 n_output_features_: Some(n_output_features),
465 powers_: Some(powers),
466 })
467 }
468}
469
470impl Transform<Array2<Float>, Array2<Float>> for PolynomialFeatures<Trained> {
471 fn transform(&self, x: &Array2<Float>) -> Result<Array2<Float>> {
472 let powers = self
473 .powers_
474 .as_ref()
475 .ok_or_else(|| SklearsError::NotFitted {
476 operation: "transform".to_string(),
477 })?;
478
479 let n_samples = x.nrows();
480 let n_features_in = x.ncols();
481 let n_output_features = powers.nrows();
482
483 if n_features_in != self.n_features_in_.unwrap_or(0) {
484 return Err(SklearsError::InvalidInput(format!(
485 "Expected {} features, got {}",
486 self.n_features_in_.unwrap_or(0),
487 n_features_in
488 )));
489 }
490
491 let mut output = Array2::zeros((n_samples, n_output_features));
492
493 self.transform_cpu(x, powers, &mut output)?;
496
497 Ok(output)
498 }
499}
500
501impl PolynomialFeatures<Trained> {
502 fn transform_cpu(
504 &self,
505 x: &Array2<Float>,
506 powers: &Array2<usize>,
507 output: &mut Array2<Float>,
508 ) -> Result<()> {
509 let n_samples = x.nrows();
510 let n_output_features = powers.nrows();
511
512 for sample_idx in 0..n_samples {
513 let sample = x.row(sample_idx);
514
515 for feature_idx in 0..n_output_features {
516 let power_row = powers.row(feature_idx);
517 let mut feature_value = 1.0;
518
519 for (input_feature_idx, &power) in power_row.iter().enumerate() {
520 if power > 0 {
521 let base_value = sample[input_feature_idx];
522 feature_value *= base_value.powi(power as i32);
523 }
524 }
525
526 output[[sample_idx, feature_idx]] = feature_value;
527 }
528 }
529
530 Ok(())
531 }
532}
533
534#[allow(non_snake_case)]
535#[cfg(test)]
536mod tests {
537 use super::*;
538 use approx::assert_abs_diff_eq;
539 use scirs2_core::ndarray::array;
540
541 #[test]
542 fn test_polynomial_features_basic() -> Result<()> {
543 let x = array![[1.0, 2.0], [2.0, 3.0]];
544 let poly = PolynomialFeatures::new().degree(2);
545
546 let fitted = poly.fit(&x, &())?;
547 let transformed = fitted.transform(&x)?;
548
549 assert_eq!(transformed.ncols(), 6);
551
552 assert_abs_diff_eq!(transformed[[0, 0]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(transformed[[0, 1]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(transformed[[0, 2]], 2.0, epsilon = 1e-10); assert_abs_diff_eq!(transformed[[0, 3]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(transformed[[0, 4]], 2.0, epsilon = 1e-10); assert_abs_diff_eq!(transformed[[0, 5]], 4.0, epsilon = 1e-10); Ok(())
561 }
562
563 #[test]
564 fn test_polynomial_features_interaction_only() -> Result<()> {
565 let x = array![[1.0, 2.0], [2.0, 3.0]];
566 let poly = PolynomialFeatures::new().degree(2).interaction_only(true);
567
568 let fitted = poly.fit(&x, &())?;
569 let transformed = fitted.transform(&x)?;
570
571 assert_eq!(transformed.ncols(), 2);
573
574 Ok(())
575 }
576
577 #[test]
578 fn test_polynomial_features_no_bias() -> Result<()> {
579 let x = array![[1.0, 2.0], [2.0, 3.0]];
580 let poly = PolynomialFeatures::new().degree(2).include_bias(false);
581
582 let fitted = poly.fit(&x, &())?;
583 let transformed = fitted.transform(&x)?;
584
585 assert_eq!(transformed.ncols(), 5);
587
588 Ok(())
589 }
590
591 #[test]
592 fn test_feature_names() -> Result<()> {
593 let x = array![[1.0, 2.0]];
594 let poly = PolynomialFeatures::new().degree(2);
595
596 let fitted = poly.fit(&x, &())?;
597 let feature_names = fitted.get_feature_names(Some(&["A".to_string(), "B".to_string()]));
598
599 assert_eq!(feature_names.len(), 6);
600 assert_eq!(feature_names[0], "1"); assert_eq!(feature_names[1], "A"); assert_eq!(feature_names[2], "B"); Ok(())
605 }
606
607 #[test]
608 fn test_n_output_features() {
609 assert_eq!(
611 PolynomialFeatures::<()>::get_n_output_features(2, 2, true, false),
612 6
613 );
614
615 assert_eq!(
617 PolynomialFeatures::<()>::get_n_output_features(2, 2, false, false),
618 5
619 );
620
621 assert_eq!(
623 PolynomialFeatures::<()>::get_n_output_features(2, 2, true, true),
624 2
625 );
626 }
627}