1use scirs2_core::ndarray::{Array1, Array2};
4use sklears_core::{
5 error::{Result, SklearsError},
6 traits::{Fit, Trained, Transform, Untrained},
7 types::Float,
8};
9use std::marker::PhantomData;
10
11#[derive(Debug, Clone)]
13pub struct SparseCoefficient {
14 pub feature_indices: Vec<usize>,
16 pub powers: Vec<usize>,
18 pub coefficient: Float,
20}
21
22impl SparseCoefficient {
23 pub fn new(feature_indices: Vec<usize>, powers: Vec<usize>, coefficient: Float) -> Self {
25 assert_eq!(feature_indices.len(), powers.len());
26 Self {
27 feature_indices,
28 powers,
29 coefficient,
30 }
31 }
32
33 pub fn evaluate(&self, sample: &Array1<Float>) -> Float {
35 let mut result = self.coefficient;
36 for (&feature_idx, &power) in self.feature_indices.iter().zip(self.powers.iter()) {
37 if power > 0 && feature_idx < sample.len() {
38 result *= sample[feature_idx].powi(power as i32);
39 }
40 }
41 result
42 }
43
44 pub fn total_degree(&self) -> usize {
46 self.powers.iter().sum()
47 }
48
49 pub fn is_interaction(&self) -> bool {
51 self.powers.iter().all(|&p| p <= 1)
52 }
53}
54
55#[derive(Debug, Clone)]
57pub struct SparsePolynomialFeaturesConfig {
58 pub degree: usize,
60 pub interaction_only: bool,
62 pub include_bias: bool,
64 pub max_terms: Option<usize>,
66 pub min_coefficient: Float,
68 pub sort_terms: bool,
70}
71
72impl Default for SparsePolynomialFeaturesConfig {
73 fn default() -> Self {
74 Self {
75 degree: 2,
76 interaction_only: false,
77 include_bias: true,
78 max_terms: None,
79 min_coefficient: 1e-12,
80 sort_terms: true,
81 }
82 }
83}
84
85pub struct SparsePolynomialFeatures<State = Untrained> {
97 config: SparsePolynomialFeaturesConfig,
98 state: PhantomData<State>,
99 n_features_in_: Option<usize>,
101 n_output_features_: Option<usize>,
102 sparse_terms_: Option<Vec<SparseCoefficient>>,
103}
104
105impl SparsePolynomialFeatures<Untrained> {
106 pub fn new() -> Self {
108 Self {
109 config: SparsePolynomialFeaturesConfig::default(),
110 state: PhantomData,
111 n_features_in_: None,
112 n_output_features_: None,
113 sparse_terms_: None,
114 }
115 }
116
117 pub fn degree(mut self, degree: usize) -> Self {
119 self.config.degree = degree;
120 self
121 }
122
123 pub fn interaction_only(mut self, interaction_only: bool) -> Self {
125 self.config.interaction_only = interaction_only;
126 self
127 }
128
129 pub fn include_bias(mut self, include_bias: bool) -> Self {
131 self.config.include_bias = include_bias;
132 self
133 }
134
135 pub fn max_terms(mut self, max_terms: usize) -> Self {
137 self.config.max_terms = Some(max_terms);
138 self
139 }
140
141 pub fn min_coefficient(mut self, min_coefficient: Float) -> Self {
143 self.config.min_coefficient = min_coefficient;
144 self
145 }
146
147 pub fn sort_terms(mut self, sort_terms: bool) -> Self {
149 self.config.sort_terms = sort_terms;
150 self
151 }
152
153 fn generate_sparse_terms(&self, n_features: usize) -> Vec<SparseCoefficient> {
155 let mut terms = Vec::new();
156
157 if self.config.include_bias {
159 terms.push(SparseCoefficient::new(vec![], vec![], 1.0));
160 }
161
162 for feature_idx in 0..n_features {
164 terms.push(SparseCoefficient::new(vec![feature_idx], vec![1], 1.0));
165 }
166
167 for degree in 2..=self.config.degree {
169 self.generate_degree_terms(n_features, degree, &mut terms);
170
171 if let Some(max_terms) = self.config.max_terms {
173 if terms.len() >= max_terms {
174 terms.truncate(max_terms);
175 break;
176 }
177 }
178 }
179
180 if self.config.sort_terms {
182 terms.sort_by(|a, b| {
183 match a.total_degree().cmp(&b.total_degree()) {
185 std::cmp::Ordering::Equal => a.feature_indices.cmp(&b.feature_indices),
186 other => other,
187 }
188 });
189 }
190
191 terms
192 }
193
194 fn generate_degree_terms(
196 &self,
197 n_features: usize,
198 degree: usize,
199 terms: &mut Vec<SparseCoefficient>,
200 ) {
201 if self.config.interaction_only && degree > 1 {
202 self.generate_interaction_combinations(n_features, degree, terms);
204 } else {
205 self.generate_polynomial_combinations(n_features, degree, terms);
207 }
208 }
209
210 fn generate_interaction_combinations(
212 &self,
213 n_features: usize,
214 degree: usize,
215 terms: &mut Vec<SparseCoefficient>,
216 ) {
217 let mut combination = vec![0; degree];
218 self.generate_combinations_recursive(n_features, degree, 0, 0, &mut combination, terms);
219 }
220
221 fn generate_polynomial_combinations(
223 &self,
224 n_features: usize,
225 degree: usize,
226 terms: &mut Vec<SparseCoefficient>,
227 ) {
228 let mut powers = vec![0; n_features];
229 self.generate_polynomial_recursive(n_features, degree, 0, &mut powers, terms);
230 }
231
232 fn generate_combinations_recursive(
234 &self,
235 n_features: usize,
236 degree: usize,
237 start: usize,
238 pos: usize,
239 combination: &mut Vec<usize>,
240 terms: &mut Vec<SparseCoefficient>,
241 ) {
242 if pos == degree {
243 let feature_indices = combination.clone();
245 let powers = vec![1; degree];
246 terms.push(SparseCoefficient::new(feature_indices, powers, 1.0));
247 return;
248 }
249
250 for i in start..n_features {
251 combination[pos] = i;
252 self.generate_combinations_recursive(
253 n_features,
254 degree,
255 i + 1,
256 pos + 1,
257 combination,
258 terms,
259 );
260 }
261 }
262
263 fn generate_polynomial_recursive(
265 &self,
266 n_features: usize,
267 remaining_degree: usize,
268 feature_idx: usize,
269 powers: &mut Vec<usize>,
270 terms: &mut Vec<SparseCoefficient>,
271 ) {
272 if remaining_degree == 0 {
273 let mut feature_indices = Vec::new();
275 let mut term_powers = Vec::new();
276
277 for (idx, &power) in powers.iter().enumerate() {
278 if power > 0 {
279 feature_indices.push(idx);
280 term_powers.push(power);
281 }
282 }
283
284 if !feature_indices.is_empty() {
285 terms.push(SparseCoefficient::new(feature_indices, term_powers, 1.0));
286 }
287 return;
288 }
289
290 if feature_idx >= n_features {
291 return;
292 }
293
294 for power in 0..=remaining_degree {
296 powers[feature_idx] = power;
297 self.generate_polynomial_recursive(
298 n_features,
299 remaining_degree - power,
300 feature_idx + 1,
301 powers,
302 terms,
303 );
304 }
305 powers[feature_idx] = 0; }
307}
308
309impl SparsePolynomialFeatures<Trained> {
310 pub fn n_features_in(&self) -> usize {
312 self.n_features_in_.expect("Not fitted")
313 }
314
315 pub fn n_output_features(&self) -> usize {
317 self.n_output_features_.expect("Not fitted")
318 }
319
320 pub fn sparse_terms(&self) -> &[SparseCoefficient] {
322 self.sparse_terms_.as_ref().expect("Not fitted")
323 }
324
325 pub fn memory_info(&self) -> SparseMemoryInfo {
327 let sparse_terms = self.sparse_terms();
328 let n_terms = sparse_terms.len();
329 let avg_features_per_term = if n_terms > 0 {
330 sparse_terms
331 .iter()
332 .map(|term| term.feature_indices.len())
333 .sum::<usize>() as f64
334 / n_terms as f64
335 } else {
336 0.0
337 };
338
339 let n_features = self.n_features_in();
340 let theoretical_dense_features = if self.config.include_bias {
341 self.binomial_coefficient(n_features + self.config.degree, self.config.degree)
343 } else {
344 self.binomial_coefficient(n_features + self.config.degree, self.config.degree) - 1
345 };
346
347 let sparsity_ratio = n_terms as f64 / theoretical_dense_features as f64;
348
349 SparseMemoryInfo {
350 n_sparse_terms: n_terms,
351 theoretical_dense_features,
352 sparsity_ratio,
353 avg_features_per_term,
354 memory_reduction_factor: theoretical_dense_features as f64 / n_terms as f64,
355 }
356 }
357
358 fn binomial_coefficient(&self, n: usize, k: usize) -> usize {
360 if k > n {
361 return 0;
362 }
363 if k == 0 || k == n {
364 return 1;
365 }
366
367 let k = if k > n - k { n - k } else { k }; let mut result = 1;
370 for i in 0..k {
371 result = result * (n - i) / (i + 1);
372 }
373 result
374 }
375}
376
377impl Default for SparsePolynomialFeatures<Untrained> {
378 fn default() -> Self {
379 Self::new()
380 }
381}
382
383impl Fit<Array2<Float>, ()> for SparsePolynomialFeatures<Untrained> {
384 type Fitted = SparsePolynomialFeatures<Trained>;
385
386 fn fit(self, x: &Array2<Float>, _y: &()) -> Result<Self::Fitted> {
387 let (n_samples, n_features) = x.dim();
388
389 if n_samples == 0 {
390 return Err(SklearsError::InvalidParameter {
391 name: "n_samples".to_string(),
392 reason: "Cannot fit on empty dataset".to_string(),
393 });
394 }
395
396 if self.config.degree == 0 {
397 return Err(SklearsError::InvalidParameter {
398 name: "degree".to_string(),
399 reason: "Degree must be positive".to_string(),
400 });
401 }
402
403 let sparse_terms = self.generate_sparse_terms(n_features);
405 let n_output_features = sparse_terms.len();
406
407 Ok(SparsePolynomialFeatures {
408 config: self.config,
409 state: PhantomData,
410 n_features_in_: Some(n_features),
411 n_output_features_: Some(n_output_features),
412 sparse_terms_: Some(sparse_terms),
413 })
414 }
415}
416
417impl Transform<Array2<Float>, Array2<Float>> for SparsePolynomialFeatures<Trained> {
418 fn transform(&self, x: &Array2<Float>) -> Result<Array2<Float>> {
419 let (n_samples, n_features) = x.dim();
420
421 if n_features != self.n_features_in() {
422 return Err(SklearsError::FeatureMismatch {
423 expected: self.n_features_in(),
424 actual: n_features,
425 });
426 }
427
428 let sparse_terms = self.sparse_terms();
429 let n_output_features = sparse_terms.len();
430
431 let mut output = Array2::zeros((n_samples, n_output_features));
432
433 for (sample_idx, sample) in x.rows().into_iter().enumerate() {
434 let sample_array = sample.to_owned();
435 for (term_idx, term) in sparse_terms.iter().enumerate() {
436 output[[sample_idx, term_idx]] = term.evaluate(&sample_array);
437 }
438 }
439
440 Ok(output)
441 }
442}
443
444#[derive(Debug, Clone)]
446pub struct SparseMemoryInfo {
447 pub n_sparse_terms: usize,
449 pub theoretical_dense_features: usize,
451 pub sparsity_ratio: f64,
453 pub avg_features_per_term: f64,
455 pub memory_reduction_factor: f64,
457}
458
459#[allow(non_snake_case)]
460#[cfg(test)]
461mod tests {
462 use super::*;
463 use scirs2_core::ndarray::array;
464
465 #[test]
466 fn test_sparse_coefficient() {
467 let coeff = SparseCoefficient::new(vec![0, 1], vec![2, 1], 1.5);
468
469 assert_eq!(coeff.total_degree(), 3);
470 assert!(!coeff.is_interaction());
471
472 let sample = array![2.0, 3.0];
473 let result = coeff.evaluate(&sample);
474 assert_eq!(result, 18.0);
476 }
477
478 #[test]
479 fn test_sparse_polynomial_features_basic() {
480 let transformer = SparsePolynomialFeatures::new().degree(2).include_bias(true);
481
482 let x = array![[1.0, 2.0], [3.0, 4.0]];
483 let fitted = transformer.fit(&x, &()).unwrap();
484
485 assert_eq!(fitted.n_features_in(), 2);
486 assert!(fitted.n_output_features() >= 5); let transformed = fitted.transform(&x).unwrap();
489 assert_eq!(transformed.nrows(), 2);
490 assert_eq!(transformed.ncols(), fitted.n_output_features());
491 }
492
493 #[test]
494 fn test_interaction_only() {
495 let transformer = SparsePolynomialFeatures::new()
496 .degree(2)
497 .interaction_only(true)
498 .include_bias(false);
499
500 let x = array![[1.0, 2.0, 3.0]];
501 let fitted = transformer.fit(&x, &()).unwrap();
502
503 let transformed = fitted.transform(&x).unwrap();
504
505 assert_eq!(transformed.ncols(), 6);
507 }
508
509 #[test]
510 fn test_max_terms_limit() {
511 let transformer = SparsePolynomialFeatures::new().degree(3).max_terms(10);
512
513 let x = array![[1.0, 2.0, 3.0, 4.0, 5.0]];
514 let fitted = transformer.fit(&x, &()).unwrap();
515
516 assert_eq!(fitted.n_output_features(), 10);
518 }
519
520 #[test]
521 fn test_memory_info() {
522 let transformer = SparsePolynomialFeatures::new().degree(2).max_terms(10);
523
524 let x = array![[1.0, 2.0, 3.0]];
525 let fitted = transformer.fit(&x, &()).unwrap();
526
527 let memory_info = fitted.memory_info();
528 assert_eq!(memory_info.n_sparse_terms, fitted.n_output_features());
529 assert!(memory_info.sparsity_ratio <= 1.0);
530 assert!(memory_info.memory_reduction_factor >= 1.0);
531 }
532}