1use scirs2_core::ndarray::{Array1, Array2};
4use std::collections::{HashMap, HashSet};
5
6use super::{FeatureSelectionConfig, FeatureSelectionResult, ScoringMethod};
7use crate::error::{Result, TimeSeriesError};
8
9pub struct WrapperMethods;
11
12impl WrapperMethods {
13 pub fn forward_selection(
41 features: &Array2<f64>,
42 target: &Array1<f64>,
43 config: &FeatureSelectionConfig,
44 ) -> Result<FeatureSelectionResult> {
45 let (n_samples, nfeatures) = features.dim();
46
47 if n_samples != target.len() {
48 return Err(TimeSeriesError::DimensionMismatch {
49 expected: n_samples,
50 actual: target.len(),
51 });
52 }
53
54 let maxfeatures = config.n_features.unwrap_or(nfeatures.min(10));
55 let mut selectedfeatures = Vec::new();
56 let mut remainingfeatures: HashSet<usize> = (0..nfeatures).collect();
57 let mut feature_scores = Array1::zeros(nfeatures);
58 let mut best_score = f64::NEG_INFINITY;
59
60 for _iteration in 0..maxfeatures.min(config.max_iterations) {
61 let mut best_feature = None;
62 let mut best_iteration_score = f64::NEG_INFINITY;
63
64 for &feature_idx in &remainingfeatures {
66 let mut currentfeatures = selectedfeatures.clone();
67 currentfeatures.push(feature_idx);
68
69 let score =
70 Self::evaluate_feature_subset(features, target, ¤tfeatures, config)?;
71
72 if score > best_iteration_score {
73 best_iteration_score = score;
74 best_feature = Some(feature_idx);
75 }
76 }
77
78 if let Some(feature_idx) = best_feature {
79 if best_iteration_score > best_score {
81 selectedfeatures.push(feature_idx);
82 remainingfeatures.remove(&feature_idx);
83 feature_scores[feature_idx] = best_iteration_score;
84 best_score = best_iteration_score;
85 } else {
86 break;
88 }
89 } else {
90 break;
91 }
92 }
93
94 let mut metadata = HashMap::new();
95 metadata.insert("final_score".to_string(), best_score);
96 metadata.insert("iterations".to_string(), selectedfeatures.len() as f64);
97
98 Ok(FeatureSelectionResult {
99 selected_features: selectedfeatures,
100 feature_scores,
101 method: "ForwardSelection".to_string(),
102 metadata,
103 })
104 }
105
106 pub fn backward_elimination(
120 features: &Array2<f64>,
121 target: &Array1<f64>,
122 config: &FeatureSelectionConfig,
123 ) -> Result<FeatureSelectionResult> {
124 let (n_samples, nfeatures) = features.dim();
125
126 if n_samples != target.len() {
127 return Err(TimeSeriesError::DimensionMismatch {
128 expected: n_samples,
129 actual: target.len(),
130 });
131 }
132
133 let minfeatures = config.n_features.unwrap_or(1).max(1);
134 let mut selectedfeatures: Vec<usize> = (0..nfeatures).collect();
135 let mut feature_scores = Array1::zeros(nfeatures);
136 let mut best_score =
137 Self::evaluate_feature_subset(features, target, &selectedfeatures, config)?;
138
139 while selectedfeatures.len() > minfeatures {
140 let mut worst_feature = None;
141 let mut best_iteration_score = f64::NEG_INFINITY;
142
143 for (i, &_feature_idx) in selectedfeatures.iter().enumerate() {
145 let mut currentfeatures = selectedfeatures.clone();
146 currentfeatures.remove(i);
147
148 if currentfeatures.is_empty() {
149 continue;
150 }
151
152 let score =
153 Self::evaluate_feature_subset(features, target, ¤tfeatures, config)?;
154
155 if score > best_iteration_score {
156 best_iteration_score = score;
157 worst_feature = Some(i);
158 }
159 }
160
161 if let Some(worst_idx) = worst_feature {
162 if best_iteration_score >= best_score * 0.99 {
164 let removed_feature = selectedfeatures.remove(worst_idx);
166 feature_scores[removed_feature] = best_score - best_iteration_score;
167 best_score = best_iteration_score;
168 } else {
169 break;
171 }
172 } else {
173 break;
174 }
175 }
176
177 for &idx in &selectedfeatures {
179 feature_scores[idx] = best_score;
180 }
181
182 let mut metadata = HashMap::new();
183 metadata.insert("final_score".to_string(), best_score);
184 metadata.insert(
185 "features_removed".to_string(),
186 (nfeatures - selectedfeatures.len()) as f64,
187 );
188
189 Ok(FeatureSelectionResult {
190 selected_features: selectedfeatures,
191 feature_scores,
192 method: "BackwardElimination".to_string(),
193 metadata,
194 })
195 }
196
197 pub fn recursive_feature_elimination(
211 features: &Array2<f64>,
212 target: &Array1<f64>,
213 config: &FeatureSelectionConfig,
214 ) -> Result<FeatureSelectionResult> {
215 let (n_samples, nfeatures) = features.dim();
216
217 if n_samples != target.len() {
218 return Err(TimeSeriesError::DimensionMismatch {
219 expected: n_samples,
220 actual: target.len(),
221 });
222 }
223
224 let targetfeatures = config.n_features.unwrap_or(nfeatures / 2).max(1);
225 let mut selectedfeatures: Vec<usize> = (0..nfeatures).collect();
226 let mut feature_scores = Array1::ones(nfeatures);
227
228 let mut iteration = 0;
229 while selectedfeatures.len() > targetfeatures && iteration < config.max_iterations {
230 let importance =
232 Self::calculate_feature_importance(features, target, &selectedfeatures)?;
233
234 let n_to_remove = ((selectedfeatures.len() as f64 * 0.1).ceil() as usize).max(1);
236 let n_to_remove = n_to_remove.min(selectedfeatures.len() - targetfeatures);
237
238 if n_to_remove == 0 {
239 break;
240 }
241
242 let mut indexed_importance: Vec<(usize, f64)> = selectedfeatures
244 .iter()
245 .map(|&idx| (idx, importance[idx]))
246 .collect();
247
248 indexed_importance
249 .sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
250
251 for &(feature_idx, importance) in indexed_importance.iter().take(n_to_remove) {
253 feature_scores[feature_idx] = importance;
254 if let Some(pos) = selectedfeatures.iter().position(|&x| x == feature_idx) {
255 selectedfeatures.remove(pos);
256 }
257 }
258
259 iteration += 1;
260 }
261
262 let final_score =
264 Self::evaluate_feature_subset(features, target, &selectedfeatures, config)?;
265 for &idx in &selectedfeatures {
266 feature_scores[idx] = final_score;
267 }
268
269 let mut metadata = HashMap::new();
270 metadata.insert("final_score".to_string(), final_score);
271 metadata.insert("iterations".to_string(), iteration as f64);
272
273 Ok(FeatureSelectionResult {
274 selected_features: selectedfeatures,
275 feature_scores,
276 method: "RecursiveFeatureElimination".to_string(),
277 metadata,
278 })
279 }
280
281 pub fn bidirectional_selection(
295 features: &Array2<f64>,
296 target: &Array1<f64>,
297 config: &FeatureSelectionConfig,
298 ) -> Result<FeatureSelectionResult> {
299 let (n_samples, nfeatures) = features.dim();
300
301 if n_samples != target.len() {
302 return Err(TimeSeriesError::DimensionMismatch {
303 expected: n_samples,
304 actual: target.len(),
305 });
306 }
307
308 let maxfeatures = config.n_features.unwrap_or(nfeatures.min(10));
309 let mut selectedfeatures = Vec::new();
310 let mut remainingfeatures: HashSet<usize> = (0..nfeatures).collect();
311 let mut feature_scores = Array1::zeros(nfeatures);
312 let mut best_score = f64::NEG_INFINITY;
313
314 for _iteration in 0..config.max_iterations {
315 let mut improved = false;
316
317 if selectedfeatures.len() < maxfeatures {
319 let mut best_add_feature = None;
320 let mut best_add_score = best_score;
321
322 for &feature_idx in &remainingfeatures {
323 let mut currentfeatures = selectedfeatures.clone();
324 currentfeatures.push(feature_idx);
325
326 let score =
327 Self::evaluate_feature_subset(features, target, ¤tfeatures, config)?;
328
329 if score > best_add_score {
330 best_add_score = score;
331 best_add_feature = Some(feature_idx);
332 }
333 }
334
335 if let Some(feature_idx) = best_add_feature {
336 selectedfeatures.push(feature_idx);
337 remainingfeatures.remove(&feature_idx);
338 feature_scores[feature_idx] = best_add_score;
339 best_score = best_add_score;
340 improved = true;
341 }
342 }
343
344 if selectedfeatures.len() > 1 {
346 let mut best_remove_idx = None;
347 let mut best_remove_score = best_score;
348
349 for (i, &_feature_idx) in selectedfeatures.iter().enumerate() {
350 let mut currentfeatures = selectedfeatures.clone();
351 currentfeatures.remove(i);
352
353 let score =
354 Self::evaluate_feature_subset(features, target, ¤tfeatures, config)?;
355
356 if score > best_remove_score {
357 best_remove_score = score;
358 best_remove_idx = Some(i);
359 }
360 }
361
362 if let Some(remove_idx) = best_remove_idx {
363 let removed_feature = selectedfeatures.remove(remove_idx);
364 remainingfeatures.insert(removed_feature);
365 feature_scores[removed_feature] = best_score - best_remove_score;
366 best_score = best_remove_score;
367 improved = true;
368 }
369 }
370
371 if !improved {
372 break;
373 }
374 }
375
376 let mut metadata = HashMap::new();
377 metadata.insert("final_score".to_string(), best_score);
378 metadata.insert("n_selected".to_string(), selectedfeatures.len() as f64);
379
380 Ok(FeatureSelectionResult {
381 selected_features: selectedfeatures,
382 feature_scores,
383 method: "BidirectionalSelection".to_string(),
384 metadata,
385 })
386 }
387
388 pub fn evaluate_feature_subset(
392 features: &Array2<f64>,
393 target: &Array1<f64>,
394 feature_indices: &[usize],
395 config: &FeatureSelectionConfig,
396 ) -> Result<f64> {
397 if feature_indices.is_empty() {
398 return Ok(f64::NEG_INFINITY);
399 }
400
401 let n_samples = features.nrows();
402
403 let selectedfeatures =
405 Array2::from_shape_fn((n_samples, feature_indices.len()), |(i, j)| {
406 features[[i, feature_indices[j]]]
407 });
408
409 match config.scoring_method {
410 ScoringMethod::MeanSquaredError => Self::calculate_mse_score(&selectedfeatures, target),
411 ScoringMethod::MeanAbsoluteError => {
412 Self::calculate_mae_score(&selectedfeatures, target)
413 }
414 ScoringMethod::RSquared => Self::calculate_r2_score(&selectedfeatures, target),
415 ScoringMethod::AIC => Self::calculate_aic_score(&selectedfeatures, target),
416 ScoringMethod::BIC => Self::calculate_bic_score(&selectedfeatures, target),
417 ScoringMethod::CrossValidation => {
418 Self::calculate_cv_score(&selectedfeatures, target, config.cv_folds)
419 }
420 }
421 }
422
423 fn calculate_mse_score(features: &Array2<f64>, target: &Array1<f64>) -> Result<f64> {
424 let predictions = Self::fit_predict_linear(features, target)?;
425 let mse = target
426 .iter()
427 .zip(predictions.iter())
428 .map(|(y_true, y_pred)| (y_true - y_pred).powi(2))
429 .sum::<f64>()
430 / target.len() as f64;
431
432 Ok(-mse) }
434
435 fn calculate_mae_score(features: &Array2<f64>, target: &Array1<f64>) -> Result<f64> {
436 let predictions = Self::fit_predict_linear(features, target)?;
437 let mae = target
438 .iter()
439 .zip(predictions.iter())
440 .map(|(y_true, y_pred)| (y_true - y_pred).abs())
441 .sum::<f64>()
442 / target.len() as f64;
443
444 Ok(-mae) }
446
447 fn calculate_r2_score(features: &Array2<f64>, target: &Array1<f64>) -> Result<f64> {
448 let predictions = Self::fit_predict_linear(features, target)?;
449 let y_mean = target.sum() / target.len() as f64;
450
451 let ss_res = target
452 .iter()
453 .zip(predictions.iter())
454 .map(|(y_true, y_pred)| (y_true - y_pred).powi(2))
455 .sum::<f64>();
456
457 let ss_tot = target.iter().map(|y| (y - y_mean).powi(2)).sum::<f64>();
458
459 if ss_tot == 0.0 {
460 Ok(0.0)
461 } else {
462 Ok(1.0 - ss_res / ss_tot)
463 }
464 }
465
466 fn calculate_aic_score(features: &Array2<f64>, target: &Array1<f64>) -> Result<f64> {
467 let predictions = Self::fit_predict_linear(features, target)?;
468 let n = target.len() as f64;
469 let k = features.ncols() as f64 + 1.0; let mse = target
472 .iter()
473 .zip(predictions.iter())
474 .map(|(y_true, y_pred)| (y_true - y_pred).powi(2))
475 .sum::<f64>()
476 / n;
477
478 let aic = n * mse.ln() + 2.0 * k;
479 Ok(-aic) }
481
482 fn calculate_bic_score(features: &Array2<f64>, target: &Array1<f64>) -> Result<f64> {
483 let predictions = Self::fit_predict_linear(features, target)?;
484 let n = target.len() as f64;
485 let k = features.ncols() as f64 + 1.0; let mse = target
488 .iter()
489 .zip(predictions.iter())
490 .map(|(y_true, y_pred)| (y_true - y_pred).powi(2))
491 .sum::<f64>()
492 / n;
493
494 let bic = n * mse.ln() + k * n.ln();
495 Ok(-bic) }
497
498 fn calculate_cv_score(
499 features: &Array2<f64>,
500 target: &Array1<f64>,
501 cv_folds: usize,
502 ) -> Result<f64> {
503 let n_samples = features.nrows();
504 let fold_size = n_samples / cv_folds;
505 let mut scores = Vec::new();
506
507 for fold in 0..cv_folds {
508 let test_start = fold * fold_size;
509 let test_end = if fold == cv_folds - 1 {
510 n_samples
511 } else {
512 (fold + 1) * fold_size
513 };
514
515 let train_indices: Vec<usize> = (0..test_start).chain(test_end..n_samples).collect();
517 let test_indices: Vec<usize> = (test_start..test_end).collect();
518
519 if train_indices.is_empty() || test_indices.is_empty() {
520 continue;
521 }
522
523 let trainfeatures =
525 Array2::from_shape_fn((train_indices.len(), features.ncols()), |(i, j)| {
526 features[[train_indices[i], j]]
527 });
528 let traintarget =
529 Array1::from_shape_fn(train_indices.len(), |i| target[train_indices[i]]);
530 let testfeatures =
531 Array2::from_shape_fn((test_indices.len(), features.ncols()), |(i, j)| {
532 features[[test_indices[i], j]]
533 });
534 let testtarget = Array1::from_shape_fn(test_indices.len(), |i| target[test_indices[i]]);
535
536 let coefficients = Self::fit_linear_regression(&trainfeatures, &traintarget)?;
538 let predictions = Self::predict_linear(&testfeatures, &coefficients);
539
540 let score = Self::calculate_r2_from_predictions(&testtarget, &predictions);
542 scores.push(score);
543 }
544
545 if scores.is_empty() {
546 Ok(0.0)
547 } else {
548 Ok(scores.iter().sum::<f64>() / scores.len() as f64)
549 }
550 }
551
552 pub fn fit_predict_linear(features: &Array2<f64>, target: &Array1<f64>) -> Result<Array1<f64>> {
554 let coefficients = Self::fit_linear_regression(features, target)?;
555 Ok(Self::predict_linear(features, &coefficients))
556 }
557
558 pub fn fit_linear_regression(
560 features: &Array2<f64>,
561 target: &Array1<f64>,
562 ) -> Result<Array1<f64>> {
563 let n_samples = features.nrows();
564 let nfeatures = features.ncols();
565
566 let mut x_matrix = Array2::ones((n_samples, nfeatures + 1));
568 for i in 0..n_samples {
569 for j in 0..nfeatures {
570 x_matrix[[i, j + 1]] = features[[i, j]];
571 }
572 }
573
574 let xt = x_matrix.t();
576 let xtx = xt.dot(&x_matrix);
577 let xty = xt.dot(target);
578
579 let coefficients = Self::solve_linear_system(&xtx, &xty)?;
581
582 Ok(coefficients)
583 }
584
585 fn predict_linear(features: &Array2<f64>, coefficients: &Array1<f64>) -> Array1<f64> {
586 let n_samples = features.nrows();
587 let nfeatures = features.ncols();
588
589 let mut predictions = Array1::zeros(n_samples);
590
591 for i in 0..n_samples {
592 let mut pred = coefficients[0]; for j in 0..nfeatures {
594 pred += coefficients[j + 1] * features[[i, j]];
595 }
596 predictions[i] = pred;
597 }
598
599 predictions
600 }
601
602 pub fn solve_linear_system(a: &Array2<f64>, b: &Array1<f64>) -> Result<Array1<f64>> {
604 let n = a.nrows();
605 if n != a.ncols() || n != b.len() {
606 return Err(TimeSeriesError::DimensionMismatch {
607 expected: n,
608 actual: b.len(),
609 });
610 }
611
612 let mut aug_matrix = Array2::zeros((n, n + 1));
614
615 for i in 0..n {
617 for j in 0..n {
618 aug_matrix[[i, j]] = a[[i, j]];
619 }
620 aug_matrix[[i, n]] = b[i];
621 }
622
623 for i in 0..n {
625 let mut max_row = i;
627 for k in (i + 1)..n {
628 if aug_matrix[[k, i]].abs() > aug_matrix[[max_row, i]].abs() {
629 max_row = k;
630 }
631 }
632
633 if max_row != i {
635 for j in 0..=n {
636 let temp = aug_matrix[[i, j]];
637 aug_matrix[[i, j]] = aug_matrix[[max_row, j]];
638 aug_matrix[[max_row, j]] = temp;
639 }
640 }
641
642 if aug_matrix[[i, i]].abs() < 1e-10 {
644 aug_matrix[[i, i]] += 1e-6;
646 }
647
648 for k in (i + 1)..n {
650 let factor = aug_matrix[[k, i]] / aug_matrix[[i, i]];
651 for j in i..=n {
652 aug_matrix[[k, j]] -= factor * aug_matrix[[i, j]];
653 }
654 }
655 }
656
657 let mut x = Array1::zeros(n);
659 for i in (0..n).rev() {
660 x[i] = aug_matrix[[i, n]];
661 for j in (i + 1)..n {
662 x[i] -= aug_matrix[[i, j]] * x[j];
663 }
664 x[i] /= aug_matrix[[i, i]];
665 }
666
667 Ok(x)
668 }
669
670 fn calculate_r2_from_predictions(target: &Array1<f64>, predictions: &Array1<f64>) -> f64 {
671 let y_mean = target.sum() / target.len() as f64;
672
673 let ss_res = target
674 .iter()
675 .zip(predictions.iter())
676 .map(|(y_true, y_pred)| (y_true - y_pred).powi(2))
677 .sum::<f64>();
678
679 let ss_tot = target.iter().map(|y| (y - y_mean).powi(2)).sum::<f64>();
680
681 if ss_tot == 0.0 {
682 0.0
683 } else {
684 1.0 - ss_res / ss_tot
685 }
686 }
687
688 fn calculate_feature_importance(
689 features: &Array2<f64>,
690 target: &Array1<f64>,
691 feature_indices: &[usize],
692 ) -> Result<Array1<f64>> {
693 let nfeatures = features.ncols();
694 let mut importance = Array1::zeros(nfeatures);
695
696 let selectedfeatures =
698 Array2::from_shape_fn((features.nrows(), feature_indices.len()), |(i, j)| {
699 features[[i, feature_indices[j]]]
700 });
701
702 let coefficients = Self::fit_linear_regression(&selectedfeatures, target)?;
704
705 for (i, &feature_idx) in feature_indices.iter().enumerate() {
707 importance[feature_idx] = coefficients[i + 1].abs();
708 }
709
710 Ok(importance)
711 }
712}