1use crate::{UtilsError, UtilsResult};
7use scirs2_core::ndarray::{Array1, Array2};
8
9#[derive(Clone, Debug)]
14pub struct PolynomialFeatures {
15 degree: usize,
16 include_bias: bool,
17 interaction_only: bool,
18}
19
20impl PolynomialFeatures {
21 pub fn new(degree: usize, include_bias: bool, interaction_only: bool) -> UtilsResult<Self> {
28 if degree == 0 {
29 return Err(UtilsError::InvalidParameter(
30 "Degree must be at least 1".to_string(),
31 ));
32 }
33
34 Ok(Self {
35 degree,
36 include_bias,
37 interaction_only,
38 })
39 }
40
41 pub fn n_output_features(&self, n_features: usize) -> usize {
49 let mut n_out = if self.include_bias { 1 } else { 0 };
50
51 if self.interaction_only {
52 for d in 1..=self.degree {
54 n_out += Self::binomial_coefficient(n_features, d);
55 }
56 } else {
57 n_out += Self::binomial_coefficient(n_features + self.degree, self.degree) - 1;
59 }
60
61 n_out
62 }
63
64 pub fn transform(&self, x: &Array2<f64>) -> UtilsResult<Array2<f64>> {
72 if x.nrows() == 0 || x.ncols() == 0 {
73 return Err(UtilsError::EmptyInput);
74 }
75
76 let n_samples = x.nrows();
77 let n_features = x.ncols();
78 let n_output = self.n_output_features(n_features);
79
80 let mut result = Array2::zeros((n_samples, n_output));
81
82 for i in 0..n_samples {
83 let row = x.row(i);
84 let features = self.generate_polynomial_row(&row.to_vec());
85 for (j, &val) in features.iter().enumerate() {
86 result[[i, j]] = val;
87 }
88 }
89
90 Ok(result)
91 }
92
93 fn generate_polynomial_row(&self, row: &[f64]) -> Vec<f64> {
94 let mut features = Vec::new();
95
96 if self.include_bias {
97 features.push(1.0);
98 }
99
100 if self.interaction_only {
101 self.generate_interaction_features(row, &mut features);
102 } else {
103 self.generate_all_polynomial_features(row, &mut features);
104 }
105
106 features
107 }
108
109 fn generate_interaction_features(&self, row: &[f64], features: &mut Vec<f64>) {
110 for degree in 1..=self.degree {
112 Self::generate_combinations(row, degree, 0, vec![], features);
113 }
114 }
115
116 fn generate_combinations(
117 row: &[f64],
118 k: usize,
119 start: usize,
120 current: Vec<usize>,
121 features: &mut Vec<f64>,
122 ) {
123 if current.len() == k {
124 let product: f64 = current.iter().map(|&i| row[i]).product();
125 features.push(product);
126 return;
127 }
128
129 for i in start..row.len() {
130 let mut next = current.clone();
131 next.push(i);
132 Self::generate_combinations(row, k, i + 1, next, features);
133 }
134 }
135
136 fn generate_all_polynomial_features(&self, row: &[f64], features: &mut Vec<f64>) {
137 let n = row.len();
138 Self::generate_powers_recursive(row, self.degree, 0, vec![0; n], 1.0, features);
139 }
140
141 fn generate_powers_recursive(
142 row: &[f64],
143 max_degree: usize,
144 feature_idx: usize,
145 powers: Vec<usize>,
146 current_product: f64,
147 features: &mut Vec<f64>,
148 ) {
149 if feature_idx == row.len() {
150 if powers.iter().sum::<usize>() > 0 {
151 features.push(current_product);
152 }
153 return;
154 }
155
156 let current_degree: usize = powers.iter().sum();
157
158 for power in 0..=(max_degree - current_degree) {
159 let mut new_powers = powers.clone();
160 new_powers[feature_idx] = power;
161
162 let new_product = if power > 0 {
163 current_product * row[feature_idx].powi(power as i32)
164 } else {
165 current_product
166 };
167
168 Self::generate_powers_recursive(
169 row,
170 max_degree,
171 feature_idx + 1,
172 new_powers,
173 new_product,
174 features,
175 );
176 }
177 }
178
179 fn binomial_coefficient(n: usize, k: usize) -> usize {
180 if k > n {
181 return 0;
182 }
183 if k == 0 || k == n {
184 return 1;
185 }
186
187 let k = k.min(n - k); let mut result = 1;
189
190 for i in 0..k {
191 result *= n - i;
192 result /= i + 1;
193 }
194
195 result
196 }
197}
198
199#[derive(Clone, Debug)]
203pub struct InteractionFeatures {
204 include_self: bool,
205}
206
207impl InteractionFeatures {
208 pub fn new(include_self: bool) -> Self {
213 Self { include_self }
214 }
215
216 pub fn transform(&self, x: &Array2<f64>) -> UtilsResult<Array2<f64>> {
224 if x.nrows() == 0 || x.ncols() == 0 {
225 return Err(UtilsError::EmptyInput);
226 }
227
228 let n_samples = x.nrows();
229 let n_features = x.ncols();
230
231 let n_interactions = if self.include_self {
232 n_features * (n_features + 1) / 2
233 } else {
234 n_features * (n_features - 1) / 2
235 };
236
237 let mut result = Array2::zeros((n_samples, n_features + n_interactions));
238
239 for i in 0..n_samples {
241 for j in 0..n_features {
242 result[[i, j]] = x[[i, j]];
243 }
244 }
245
246 let mut col_idx = n_features;
248 for i in 0..n_features {
249 let start_j = if self.include_self { i } else { i + 1 };
250 for j in start_j..n_features {
251 for row in 0..n_samples {
252 result[[row, col_idx]] = x[[row, i]] * x[[row, j]];
253 }
254 col_idx += 1;
255 }
256 }
257
258 Ok(result)
259 }
260
261 pub fn n_output_features(&self, n_input_features: usize) -> usize {
263 let n_interactions = if self.include_self {
264 n_input_features * (n_input_features + 1) / 2
265 } else {
266 n_input_features * (n_input_features - 1) / 2
267 };
268 n_input_features + n_interactions
269 }
270}
271
272impl Default for InteractionFeatures {
273 fn default() -> Self {
274 Self::new(false)
275 }
276}
277
278#[derive(Clone, Debug)]
282pub struct FeatureBinner {
283 n_bins: usize,
284 strategy: BinningStrategy,
285}
286
287#[derive(Clone, Debug, PartialEq)]
289pub enum BinningStrategy {
290 Uniform,
292 Quantile,
294 KMeans,
296}
297
298impl FeatureBinner {
299 pub fn new(n_bins: usize, strategy: BinningStrategy) -> UtilsResult<Self> {
305 if n_bins < 2 {
306 return Err(UtilsError::InvalidParameter(
307 "n_bins must be at least 2".to_string(),
308 ));
309 }
310
311 Ok(Self { n_bins, strategy })
312 }
313
314 pub fn fit_transform(&self, x: &Array1<f64>) -> UtilsResult<Array1<usize>> {
322 if x.is_empty() {
323 return Err(UtilsError::EmptyInput);
324 }
325
326 match self.strategy {
327 BinningStrategy::Uniform => self.uniform_binning(x),
328 BinningStrategy::Quantile => self.quantile_binning(x),
329 BinningStrategy::KMeans => {
330 self.quantile_binning(x)
332 }
333 }
334 }
335
336 fn uniform_binning(&self, x: &Array1<f64>) -> UtilsResult<Array1<usize>> {
337 let min_val = x.iter().copied().fold(f64::INFINITY, f64::min);
338 let max_val = x.iter().copied().fold(f64::NEG_INFINITY, f64::max);
339
340 if (max_val - min_val).abs() < 1e-10 {
341 return Ok(Array1::zeros(x.len()));
343 }
344
345 let bin_width = (max_val - min_val) / (self.n_bins as f64);
346 let mut result = Array1::zeros(x.len());
347
348 for (i, &val) in x.iter().enumerate() {
349 let bin = ((val - min_val) / bin_width).floor() as usize;
350 result[i] = bin.min(self.n_bins - 1); }
352
353 Ok(result)
354 }
355
356 fn quantile_binning(&self, x: &Array1<f64>) -> UtilsResult<Array1<usize>> {
357 let mut sorted: Vec<f64> = x.to_vec();
358 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
359
360 let mut edges = Vec::with_capacity(self.n_bins + 1);
362 edges.push(sorted[0] - 1e-10); for i in 1..self.n_bins {
365 let quantile = i as f64 / self.n_bins as f64;
366 let idx = (quantile * sorted.len() as f64) as usize;
367 let idx = idx.min(sorted.len() - 1);
368 edges.push(sorted[idx]);
369 }
370
371 edges.push(sorted[sorted.len() - 1] + 1e-10); let mut result = Array1::zeros(x.len());
375 for (i, &val) in x.iter().enumerate() {
376 for bin in 0..self.n_bins {
377 if val > edges[bin] && val <= edges[bin + 1] {
378 result[i] = bin;
379 break;
380 }
381 }
382 }
383
384 Ok(result)
385 }
386}
387
388#[cfg(test)]
389mod tests {
390 use super::*;
391 use approx::assert_abs_diff_eq;
392 use scirs2_core::ndarray::array;
393
394 #[test]
395 fn test_polynomial_features_degree_2() {
396 let poly = PolynomialFeatures::new(2, true, false).unwrap();
397
398 let x = array![[2.0, 3.0]];
401 let result = poly.transform(&x).unwrap();
402
403 assert_eq!(result.ncols(), 6);
404
405 let values: Vec<f64> = (0..result.ncols()).map(|i| result[[0, i]]).collect();
407
408 assert!(values.contains(&1.0));
410 assert!(values.contains(&2.0));
412 assert!(values.contains(&3.0));
413 assert!(values.contains(&4.0)); assert!(values.contains(&6.0)); assert!(values.contains(&9.0)); }
418
419 #[test]
420 fn test_polynomial_features_interaction_only() {
421 let poly = PolynomialFeatures::new(2, false, true).unwrap();
422
423 let x = array![[2.0, 3.0]];
426 let result = poly.transform(&x).unwrap();
427
428 assert_eq!(result.ncols(), 3);
429 assert_abs_diff_eq!(result[[0, 0]], 2.0, epsilon = 1e-10); assert_abs_diff_eq!(result[[0, 1]], 3.0, epsilon = 1e-10); assert_abs_diff_eq!(result[[0, 2]], 6.0, epsilon = 1e-10); }
433
434 #[test]
435 fn test_polynomial_n_output_features() {
436 let poly = PolynomialFeatures::new(2, true, false).unwrap();
437 assert_eq!(poly.n_output_features(2), 6); let poly = PolynomialFeatures::new(3, false, false).unwrap();
440 assert_eq!(poly.n_output_features(2), 9); }
442
443 #[test]
444 fn test_interaction_features() {
445 let interaction = InteractionFeatures::new(false);
446
447 let x = array![[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]];
450
451 let result = interaction.transform(&x).unwrap();
452
453 assert_eq!(result.ncols(), 6); assert_abs_diff_eq!(result[[0, 0]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(result[[0, 1]], 2.0, epsilon = 1e-10); assert_abs_diff_eq!(result[[0, 2]], 3.0, epsilon = 1e-10); assert_abs_diff_eq!(result[[0, 3]], 2.0, epsilon = 1e-10); assert_abs_diff_eq!(result[[0, 4]], 3.0, epsilon = 1e-10); assert_abs_diff_eq!(result[[0, 5]], 6.0, epsilon = 1e-10); }
463
464 #[test]
465 fn test_interaction_features_with_self() {
466 let interaction = InteractionFeatures::new(true);
467
468 let x = array![[2.0, 3.0]];
469 let result = interaction.transform(&x).unwrap();
470
471 assert_eq!(result.ncols(), 5); assert_abs_diff_eq!(result[[0, 2]], 4.0, epsilon = 1e-10); assert_abs_diff_eq!(result[[0, 3]], 6.0, epsilon = 1e-10); assert_abs_diff_eq!(result[[0, 4]], 9.0, epsilon = 1e-10); }
477
478 #[test]
479 fn test_uniform_binning() {
480 let binner = FeatureBinner::new(3, BinningStrategy::Uniform).unwrap();
481
482 let x = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
483 let bins = binner.fit_transform(&x).unwrap();
484
485 assert_eq!(bins[0], 0); assert_eq!(bins[5], 1); assert_eq!(bins[9], 2); }
490
491 #[test]
492 fn test_quantile_binning() {
493 let binner = FeatureBinner::new(4, BinningStrategy::Quantile).unwrap();
494
495 let x = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
496 let bins = binner.fit_transform(&x).unwrap();
497
498 let mut bin_counts = vec![0; 4];
500 for &bin in bins.iter() {
501 bin_counts[bin] += 1;
502 }
503
504 for &count in &bin_counts {
506 assert!(count > 0);
507 }
508 }
509
510 #[test]
511 fn test_binning_constant_values() {
512 let binner = FeatureBinner::new(3, BinningStrategy::Uniform).unwrap();
513
514 let x = array![5.0, 5.0, 5.0, 5.0];
515 let bins = binner.fit_transform(&x).unwrap();
516
517 assert!(bins.iter().all(|&b| b == 0));
519 }
520}