1use scirs2_core::ndarray::{Array1, Array2, ArrayView1, Axis};
8use scirs2_core::random::essentials::Normal as RandNormal;
9use scirs2_core::random::rngs::StdRng as RealStdRng;
10use scirs2_core::random::seq::SliceRandom;
11use scirs2_core::random::RngExt;
12use scirs2_core::random::{thread_rng, SeedableRng};
13use sklears_core::error::Result;
14use std::collections::HashMap;
15
16#[derive(Clone, Debug, PartialEq)]
18pub enum RobustEstimator {
20 Huber { delta: f64 },
22 Tukey { c: f64 },
24 Hampel { a: f64, b: f64, c: f64 },
26 MVE { alpha: f64 },
28 MCD { alpha: f64 },
30 SEstimator { breakdown_point: f64 },
32 MMEstimator { efficiency: f64 },
34 L1,
36 Quantile { tau: f64 },
38}
39
40#[derive(Clone, Debug, PartialEq)]
42pub enum RobustLoss {
44 Huber { delta: f64 },
46 EpsilonInsensitive { epsilon: f64 },
48 Tukey { c: f64 },
50 Cauchy { sigma: f64 },
52 Welsch { sigma: f64 },
54 Fair { c: f64 },
56 Logistic { alpha: f64 },
58 Quantile { tau: f64 },
60}
61
62#[derive(Clone, Debug)]
64pub struct RobustKernelConfig {
66 pub estimator: RobustEstimator,
68 pub loss: RobustLoss,
70 pub max_iterations: usize,
72 pub tolerance: f64,
74 pub contamination: f64,
76 pub use_irls: bool,
78 pub random_state: Option<u64>,
80}
81
82impl Default for RobustKernelConfig {
83 fn default() -> Self {
84 Self {
85 estimator: RobustEstimator::Huber { delta: 1.345 },
86 loss: RobustLoss::Huber { delta: 1.345 },
87 max_iterations: 100,
88 tolerance: 1e-6,
89 contamination: 0.1,
90 use_irls: true,
91 random_state: None,
92 }
93 }
94}
95
96pub struct RobustRBFSampler {
98 n_components: usize,
99 gamma: f64,
100 config: RobustKernelConfig,
101 random_weights: Option<Array2<f64>>,
102 random_offset: Option<Array1<f64>>,
103 robust_weights: Option<Array1<f64>>,
104 outlier_mask: Option<Array1<bool>>,
105}
106
107impl RobustRBFSampler {
108 pub fn new(n_components: usize) -> Self {
110 Self {
111 n_components,
112 gamma: 1.0,
113 config: RobustKernelConfig::default(),
114 random_weights: None,
115 random_offset: None,
116 robust_weights: None,
117 outlier_mask: None,
118 }
119 }
120
121 pub fn gamma(mut self, gamma: f64) -> Self {
123 self.gamma = gamma;
124 self
125 }
126
127 pub fn with_config(mut self, config: RobustKernelConfig) -> Self {
129 self.config = config;
130 self
131 }
132
133 pub fn fit(&mut self, x: &Array2<f64>) -> Result<()> {
135 let n_features = x.ncols();
136
137 let normal =
139 RandNormal::new(0.0, (2.0 * self.gamma).sqrt()).expect("operation should succeed");
140 let mut rng = if let Some(seed) = self.config.random_state {
141 RealStdRng::seed_from_u64(seed)
142 } else {
143 RealStdRng::from_seed(thread_rng().random())
144 };
145
146 self.random_weights = Some(Array2::from_shape_fn(
147 (n_features, self.n_components),
148 |_| rng.sample(normal),
149 ));
150
151 self.random_offset = Some(Array1::from_shape_fn(self.n_components, |_| {
152 rng.random_range(0.0..2.0 * std::f64::consts::PI)
153 }));
154
155 self.detect_outliers(x)?;
157 self.compute_robust_weights(x)?;
158
159 Ok(())
160 }
161
162 pub fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
164 let weights = self.random_weights.as_ref().ok_or("Model not fitted")?;
165 let offset = self.random_offset.as_ref().ok_or("Model not fitted")?;
166
167 let projection = x.dot(weights) + offset;
169
170 let mut transformed =
172 projection.mapv(|x| (x * (2.0 / self.n_components as f64).sqrt()).cos());
173
174 if let Some(robust_weights) = &self.robust_weights {
176 for (mut row, &weight) in transformed
177 .rows_mut()
178 .into_iter()
179 .zip(robust_weights.iter())
180 {
181 row *= weight;
182 }
183 }
184
185 Ok(transformed)
186 }
187
188 fn detect_outliers(&mut self, x: &Array2<f64>) -> Result<()> {
190 match &self.config.estimator {
191 RobustEstimator::MVE { alpha } => {
192 self.outlier_mask = Some(self.detect_outliers_mve(x, *alpha)?);
193 }
194 RobustEstimator::MCD { alpha } => {
195 self.outlier_mask = Some(self.detect_outliers_mcd(x, *alpha)?);
196 }
197 RobustEstimator::Huber { delta } => {
198 self.outlier_mask = Some(self.detect_outliers_huber(x, *delta)?);
199 }
200 RobustEstimator::Tukey { c } => {
201 self.outlier_mask = Some(self.detect_outliers_tukey(x, *c)?);
202 }
203 _ => {
204 self.outlier_mask = Some(self.detect_outliers_mahalanobis(x)?);
206 }
207 }
208
209 Ok(())
210 }
211
212 fn detect_outliers_mve(&self, x: &Array2<f64>, alpha: f64) -> Result<Array1<bool>> {
214 let n_samples = x.nrows();
215 let n_features = x.ncols();
216 let h = ((n_samples as f64 * alpha).floor() as usize).max(n_features + 1);
217
218 let mut best_volume = f64::INFINITY;
219 let mut best_subset = Vec::new();
220 let mut rng = thread_rng();
221
222 for _ in 0..100 {
224 let mut subset: Vec<usize> = (0..n_samples).collect();
225 subset.shuffle(&mut rng);
226 subset.truncate(h);
227
228 let subset_data = self.extract_subset(x, &subset);
230 let (_mean, cov) = self.compute_robust_statistics(&subset_data);
231
232 let volume = self.compute_determinant(&cov);
234
235 if volume < best_volume {
236 best_volume = volume;
237 best_subset = subset;
238 }
239 }
240
241 let mut outlier_mask = Array1::from_elem(n_samples, true);
243 for &idx in &best_subset {
244 outlier_mask[idx] = false;
245 }
246
247 Ok(outlier_mask)
248 }
249
250 fn detect_outliers_mcd(&self, x: &Array2<f64>, alpha: f64) -> Result<Array1<bool>> {
252 let n_samples = x.nrows();
253 let n_features = x.ncols();
254 let h = ((n_samples as f64 * alpha).floor() as usize).max(n_features + 1);
255
256 let mut best_determinant = f64::INFINITY;
257 let mut best_subset = Vec::new();
258 let mut rng = thread_rng();
259
260 for _ in 0..100 {
262 let mut subset: Vec<usize> = (0..n_samples).collect();
263 subset.shuffle(&mut rng);
264 subset.truncate(h);
265
266 for _ in 0..10 {
268 let subset_data = self.extract_subset(x, &subset);
269 let (mean, cov) = self.compute_robust_statistics(&subset_data);
270 let determinant = self.compute_determinant(&cov);
271
272 if determinant < best_determinant {
273 best_determinant = determinant;
274 best_subset = subset.clone();
275 }
276
277 let distances = self.compute_mahalanobis_distances(x, &mean, &cov);
279 let mut indexed_distances: Vec<(usize, f64)> =
280 distances.iter().enumerate().map(|(i, &d)| (i, d)).collect();
281 indexed_distances
282 .sort_by(|a, b| a.1.partial_cmp(&b.1).expect("operation should succeed"));
283
284 subset = indexed_distances.iter().take(h).map(|(i, _)| *i).collect();
285 }
286 }
287
288 let mut outlier_mask = Array1::from_elem(n_samples, true);
290 for &idx in &best_subset {
291 outlier_mask[idx] = false;
292 }
293
294 Ok(outlier_mask)
295 }
296
297 fn detect_outliers_huber(&self, x: &Array2<f64>, delta: f64) -> Result<Array1<bool>> {
299 let n_samples = x.nrows();
300 let mut outlier_mask = Array1::from_elem(n_samples, false);
301
302 let center = self.compute_huber_center(x, delta);
304 let scale = self.compute_huber_scale(x, ¢er, delta);
305
306 for i in 0..n_samples {
308 let distance = self.compute_huber_distance(&x.row(i), ¢er, delta);
309 if distance > 3.0 * scale {
310 outlier_mask[i] = true;
311 }
312 }
313
314 Ok(outlier_mask)
315 }
316
317 fn detect_outliers_tukey(&self, x: &Array2<f64>, c: f64) -> Result<Array1<bool>> {
319 let n_samples = x.nrows();
320 let mut outlier_mask = Array1::from_elem(n_samples, false);
321
322 let center = self.compute_tukey_center(x, c);
324 let scale = self.compute_tukey_scale(x, ¢er, c);
325
326 for i in 0..n_samples {
328 let distance = self.compute_tukey_distance(&x.row(i), ¢er, c);
329 if distance > 3.0 * scale {
330 outlier_mask[i] = true;
331 }
332 }
333
334 Ok(outlier_mask)
335 }
336
337 fn detect_outliers_mahalanobis(&self, x: &Array2<f64>) -> Result<Array1<bool>> {
339 let n_samples = x.nrows();
340 let mut outlier_mask = Array1::from_elem(n_samples, false);
341
342 let (mean, cov) = self.compute_robust_statistics(x);
344 let distances = self.compute_mahalanobis_distances(x, &mean, &cov);
345
346 let threshold = 3.0; for (i, &distance) in distances.iter().enumerate() {
350 if distance > threshold {
351 outlier_mask[i] = true;
352 }
353 }
354
355 Ok(outlier_mask)
356 }
357
358 fn compute_robust_weights(&mut self, x: &Array2<f64>) -> Result<()> {
360 let n_samples = x.nrows();
361 let mut weights = Array1::ones(n_samples);
362
363 if self.config.use_irls {
364 for _iteration in 0..self.config.max_iterations {
365 let old_weights = weights.clone();
366
367 for i in 0..n_samples {
369 let residual = self.compute_residual(&x.row(i), i);
370 weights[i] = self.compute_robust_weight(residual);
371 }
372
373 let weight_change = (&weights - &old_weights).mapv(|x| x.abs()).sum();
375 if weight_change < self.config.tolerance {
376 break;
377 }
378 }
379 }
380
381 if let Some(outlier_mask) = &self.outlier_mask {
383 for (i, &is_outlier) in outlier_mask.iter().enumerate() {
384 if is_outlier {
385 weights[i] = 0.0;
386 }
387 }
388 }
389
390 self.robust_weights = Some(weights);
391 Ok(())
392 }
393
394 fn compute_residual(&self, sample: &ArrayView1<f64>, _index: usize) -> f64 {
396 sample.iter().map(|&x| x.abs()).sum::<f64>() / sample.len() as f64
398 }
399
400 fn compute_robust_weight(&self, residual: f64) -> f64 {
402 match &self.config.loss {
403 RobustLoss::Huber { delta } => {
404 if residual.abs() <= *delta {
405 1.0
406 } else {
407 *delta / residual.abs()
408 }
409 }
410 RobustLoss::Tukey { c } => {
411 let u = residual / *c;
412 if u.abs() <= 1.0 {
413 (1.0 - u * u).powi(2)
414 } else {
415 0.0
416 }
417 }
418 RobustLoss::Cauchy { sigma } => 1.0 / (1.0 + (residual / *sigma).powi(2)),
419 RobustLoss::Welsch { sigma } => (-(residual / *sigma).powi(2)).exp(),
420 RobustLoss::Fair { c } => *c / (residual.abs() + *c),
421 _ => 1.0, }
423 }
424
425 fn extract_subset(&self, x: &Array2<f64>, indices: &[usize]) -> Array2<f64> {
427 let n_features = x.ncols();
428 let mut subset = Array2::zeros((indices.len(), n_features));
429
430 for (i, &idx) in indices.iter().enumerate() {
431 subset.row_mut(i).assign(&x.row(idx));
432 }
433
434 subset
435 }
436
437 fn compute_robust_statistics(&self, x: &Array2<f64>) -> (Array1<f64>, Array2<f64>) {
439 let n_samples = x.nrows();
440 let n_features = x.ncols();
441
442 let mean = x.mean_axis(Axis(0)).expect("operation should succeed");
444
445 let mut cov = Array2::zeros((n_features, n_features));
447 for sample in x.rows() {
448 let centered = &sample - &mean;
449 for i in 0..n_features {
450 for j in 0..n_features {
451 cov[[i, j]] += centered[i] * centered[j];
452 }
453 }
454 }
455 cov /= (n_samples - 1) as f64;
456
457 (mean, cov)
458 }
459
460 fn compute_determinant(&self, matrix: &Array2<f64>) -> f64 {
462 if matrix.nrows() == 2 && matrix.ncols() == 2 {
464 matrix[[0, 0]] * matrix[[1, 1]] - matrix[[0, 1]] * matrix[[1, 0]]
465 } else {
466 matrix.diag().iter().product::<f64>()
468 }
469 }
470
471 fn compute_mahalanobis_distances(
473 &self,
474 x: &Array2<f64>,
475 mean: &Array1<f64>,
476 _cov: &Array2<f64>,
477 ) -> Array1<f64> {
478 let n_samples = x.nrows();
479 let mut distances = Array1::zeros(n_samples);
480
481 for (i, sample) in x.rows().into_iter().enumerate() {
483 let centered = &sample - mean;
484 let distance = centered.dot(¢ered).sqrt();
485 distances[i] = distance;
486 }
487
488 distances
489 }
490
491 fn compute_huber_center(&self, x: &Array2<f64>, _delta: f64) -> Array1<f64> {
493 x.mean_axis(Axis(0)).expect("operation should succeed")
495 }
496
497 fn compute_huber_scale(&self, x: &Array2<f64>, center: &Array1<f64>, _delta: f64) -> f64 {
499 let distances: Vec<f64> = x
501 .rows()
502 .into_iter()
503 .map(|row| (&row - center).mapv(|x| x.abs()).sum())
504 .collect();
505
506 let mut sorted_distances = distances;
507 sorted_distances.sort_by(|a, b| a.partial_cmp(b).expect("operation should succeed"));
508 sorted_distances[sorted_distances.len() / 2] }
510
511 fn compute_huber_distance(
513 &self,
514 sample: &ArrayView1<f64>,
515 center: &Array1<f64>,
516 _delta: f64,
517 ) -> f64 {
518 (sample - center).mapv(|x| x.abs()).sum()
519 }
520
521 fn compute_tukey_center(&self, x: &Array2<f64>, _c: f64) -> Array1<f64> {
523 x.mean_axis(Axis(0)).expect("operation should succeed")
525 }
526
527 fn compute_tukey_scale(&self, x: &Array2<f64>, center: &Array1<f64>, _c: f64) -> f64 {
529 let distances: Vec<f64> = x
531 .rows()
532 .into_iter()
533 .map(|row| (&row - center).mapv(|x| x.abs()).sum())
534 .collect();
535
536 let mut sorted_distances = distances;
537 sorted_distances.sort_by(|a, b| a.partial_cmp(b).expect("operation should succeed"));
538 sorted_distances[sorted_distances.len() / 2] }
540
541 fn compute_tukey_distance(
543 &self,
544 sample: &ArrayView1<f64>,
545 center: &Array1<f64>,
546 _c: f64,
547 ) -> f64 {
548 (sample - center).mapv(|x| x.abs()).sum()
549 }
550
551 pub fn get_outlier_mask(&self) -> Option<&Array1<bool>> {
553 self.outlier_mask.as_ref()
554 }
555
556 pub fn get_robust_weights(&self) -> Option<&Array1<f64>> {
558 self.robust_weights.as_ref()
559 }
560}
561
562pub struct RobustNystroem {
564 n_components: usize,
565 gamma: f64,
566 config: RobustKernelConfig,
567 basis_indices: Option<Vec<usize>>,
568 basis_kernel: Option<Array2<f64>>,
569 normalization: Option<Array2<f64>>,
570 robust_weights: Option<Array1<f64>>,
571 outlier_mask: Option<Array1<bool>>,
572}
573
574impl RobustNystroem {
575 pub fn new(n_components: usize) -> Self {
577 Self {
578 n_components,
579 gamma: 1.0,
580 config: RobustKernelConfig::default(),
581 basis_indices: None,
582 basis_kernel: None,
583 normalization: None,
584 robust_weights: None,
585 outlier_mask: None,
586 }
587 }
588
589 pub fn gamma(mut self, gamma: f64) -> Self {
591 self.gamma = gamma;
592 self
593 }
594
595 pub fn with_config(mut self, config: RobustKernelConfig) -> Self {
597 self.config = config;
598 self
599 }
600
601 pub fn fit(&mut self, x: &Array2<f64>) -> Result<()> {
603 let mut robust_rbf = RobustRBFSampler::new(self.n_components)
605 .gamma(self.gamma)
606 .with_config(self.config.clone());
607
608 robust_rbf.fit(x)?;
609 self.outlier_mask = robust_rbf.get_outlier_mask().cloned();
610 self.robust_weights = robust_rbf.get_robust_weights().cloned();
611
612 self.sample_robust_basis_points(x)?;
614
615 let basis_points = self.get_basis_points(x);
617 let kernel_matrix = self.compute_robust_kernel_matrix(&basis_points)?;
618
619 let (eigenvalues, eigenvectors) = self.compute_eigendecomposition(&kernel_matrix)?;
621
622 self.normalization = Some(self.compute_normalization(&eigenvalues, &eigenvectors)?);
624
625 Ok(())
626 }
627
628 pub fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
630 let normalization = self.normalization.as_ref().ok_or("Model not fitted")?;
631
632 let kernel_values = self.compute_robust_kernel_values(x)?;
634
635 let result = kernel_values.dot(normalization);
637
638 Ok(result)
639 }
640
641 fn sample_robust_basis_points(&mut self, x: &Array2<f64>) -> Result<()> {
643 let n_samples = x.nrows();
644 let mut indices = Vec::new();
645 let mut rng = if let Some(seed) = self.config.random_state {
646 RealStdRng::seed_from_u64(seed)
647 } else {
648 RealStdRng::from_seed(thread_rng().random())
649 };
650
651 let available_indices: Vec<usize> = if let Some(outlier_mask) = &self.outlier_mask {
653 (0..n_samples).filter(|&i| !outlier_mask[i]).collect()
654 } else {
655 (0..n_samples).collect()
656 };
657
658 let n_available = available_indices.len();
660 let n_basis = std::cmp::min(self.n_components, n_available);
661
662 for _ in 0..n_basis {
663 let idx = rng.random_range(0..n_available);
664 indices.push(available_indices[idx]);
665 }
666
667 self.basis_indices = Some(indices);
668 Ok(())
669 }
670
671 fn get_basis_points(&self, x: &Array2<f64>) -> Array2<f64> {
673 let indices = self
674 .basis_indices
675 .as_ref()
676 .expect("operation should succeed");
677 let n_features = x.ncols();
678 let mut basis_points = Array2::zeros((indices.len(), n_features));
679
680 for (i, &idx) in indices.iter().enumerate() {
681 basis_points.row_mut(i).assign(&x.row(idx));
682 }
683
684 basis_points
685 }
686
687 fn compute_robust_kernel_matrix(&self, basis_points: &Array2<f64>) -> Result<Array2<f64>> {
689 let n_basis = basis_points.nrows();
690 let mut kernel_matrix = Array2::zeros((n_basis, n_basis));
691
692 for i in 0..n_basis {
693 for j in i..n_basis {
694 let dist_sq = basis_points
695 .row(i)
696 .iter()
697 .zip(basis_points.row(j).iter())
698 .map(|(&a, &b)| (a - b).powi(2))
699 .sum::<f64>();
700
701 let kernel_value = (-self.gamma * dist_sq).exp();
702 kernel_matrix[[i, j]] = kernel_value;
703 kernel_matrix[[j, i]] = kernel_value;
704 }
705 }
706
707 if let Some(weights) = &self.robust_weights {
709 let indices = self
710 .basis_indices
711 .as_ref()
712 .expect("operation should succeed");
713 for (i, &idx_i) in indices.iter().enumerate() {
714 for (j, &idx_j) in indices.iter().enumerate() {
715 kernel_matrix[[i, j]] *= (weights[idx_i] * weights[idx_j]).sqrt();
716 }
717 }
718 }
719
720 Ok(kernel_matrix)
721 }
722
723 fn compute_eigendecomposition(
725 &self,
726 kernel_matrix: &Array2<f64>,
727 ) -> Result<(Array1<f64>, Array2<f64>)> {
728 let n = kernel_matrix.nrows();
730 let eigenvalues = Array1::ones(n);
731 let eigenvectors = kernel_matrix.clone();
732
733 Ok((eigenvalues, eigenvectors))
734 }
735
736 fn compute_normalization(
738 &self,
739 eigenvalues: &Array1<f64>,
740 eigenvectors: &Array2<f64>,
741 ) -> Result<Array2<f64>> {
742 let mut normalization = Array2::zeros(eigenvectors.dim());
743
744 for i in 0..eigenvectors.nrows() {
745 for j in 0..eigenvectors.ncols() {
746 if eigenvalues[j] > 1e-10 {
747 normalization[[i, j]] = eigenvectors[[i, j]] / eigenvalues[j].sqrt();
748 }
749 }
750 }
751
752 Ok(normalization)
753 }
754
755 fn compute_robust_kernel_values(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
757 let indices = self
758 .basis_indices
759 .as_ref()
760 .expect("operation should succeed");
761 let n_samples = x.nrows();
762 let n_basis = indices.len();
763 let mut kernel_values = Array2::zeros((n_samples, n_basis));
764
765 for i in 0..n_samples {
766 for (j, &_basis_idx) in indices.iter().enumerate() {
767 kernel_values[[i, j]] = thread_rng().gen_range(0.0..1.0);
769 }
770 }
771
772 Ok(kernel_values)
773 }
774
775 pub fn get_outlier_mask(&self) -> Option<&Array1<bool>> {
777 self.outlier_mask.as_ref()
778 }
779
780 pub fn get_robust_weights(&self) -> Option<&Array1<f64>> {
782 self.robust_weights.as_ref()
783 }
784}
785
786pub struct BreakdownPointAnalysis {
788 estimator: RobustEstimator,
789 contamination_levels: Vec<f64>,
790 breakdown_points: HashMap<String, f64>,
791}
792
793impl Default for BreakdownPointAnalysis {
794 fn default() -> Self {
795 Self::new()
796 }
797}
798
799impl BreakdownPointAnalysis {
800 pub fn new() -> Self {
802 Self {
803 estimator: RobustEstimator::Huber { delta: 1.345 },
804 contamination_levels: (1..=50).map(|x| x as f64 / 100.0).collect(),
805 breakdown_points: HashMap::new(),
806 }
807 }
808
809 pub fn with_estimator(mut self, estimator: RobustEstimator) -> Self {
811 self.estimator = estimator;
812 self
813 }
814
815 pub fn analyze(&mut self, x: &Array2<f64>) -> Result<f64> {
817 let mut breakdown_point = 0.0;
818
819 for &contamination in &self.contamination_levels {
820 let contaminated_data = self.add_contamination(x, contamination);
821 let bias = self.compute_bias(&contaminated_data, x)?;
822
823 if bias > 0.5 {
824 breakdown_point = contamination;
826 break;
827 }
828 }
829
830 let estimator_name = format!("{:?}", self.estimator);
831 self.breakdown_points
832 .insert(estimator_name, breakdown_point);
833
834 Ok(breakdown_point)
835 }
836
837 fn add_contamination(&self, x: &Array2<f64>, contamination: f64) -> Array2<f64> {
839 let n_samples = x.nrows();
840 let n_contaminated = (n_samples as f64 * contamination) as usize;
841 let mut contaminated = x.clone();
842 let mut rng = thread_rng();
843
844 for _ in 0..n_contaminated {
846 let idx = rng.random_range(0..n_samples);
847 for j in 0..x.ncols() {
848 contaminated[[idx, j]] += rng.random_range(-10.0..10.0);
849 }
850 }
851
852 contaminated
853 }
854
855 fn compute_bias(&self, contaminated: &Array2<f64>, original: &Array2<f64>) -> Result<f64> {
857 let original_mean = original
859 .mean_axis(Axis(0))
860 .expect("operation should succeed");
861 let contaminated_mean = contaminated
862 .mean_axis(Axis(0))
863 .expect("operation should succeed");
864
865 let bias = (&contaminated_mean - &original_mean)
866 .mapv(|x| x.abs())
867 .sum();
868 Ok(bias)
869 }
870
871 pub fn get_breakdown_points(&self) -> &HashMap<String, f64> {
873 &self.breakdown_points
874 }
875}
876
877pub struct InfluenceFunctionDiagnostics {
879 estimator: RobustEstimator,
880 influence_values: Option<Array1<f64>>,
881 leverage_values: Option<Array1<f64>>,
882 cook_distances: Option<Array1<f64>>,
883}
884
885impl Default for InfluenceFunctionDiagnostics {
886 fn default() -> Self {
887 Self::new()
888 }
889}
890
891impl InfluenceFunctionDiagnostics {
892 pub fn new() -> Self {
894 Self {
895 estimator: RobustEstimator::Huber { delta: 1.345 },
896 influence_values: None,
897 leverage_values: None,
898 cook_distances: None,
899 }
900 }
901
902 pub fn with_estimator(mut self, estimator: RobustEstimator) -> Self {
904 self.estimator = estimator;
905 self
906 }
907
908 pub fn compute(&mut self, x: &Array2<f64>, y: &Array1<f64>) -> Result<()> {
910 self.influence_values = Some(self.compute_influence_values(x, y)?);
911 self.leverage_values = Some(self.compute_leverage_values(x)?);
912 self.cook_distances = Some(self.compute_cook_distances(x, y)?);
913
914 Ok(())
915 }
916
917 fn compute_influence_values(&self, x: &Array2<f64>, y: &Array1<f64>) -> Result<Array1<f64>> {
919 let n_samples = x.nrows();
920 let mut influence = Array1::zeros(n_samples);
921
922 let baseline_estimate = self.compute_robust_estimate(x, y)?;
924
925 for i in 0..n_samples {
927 let (x_reduced, y_reduced) = self.remove_observation(x, y, i);
928 let reduced_estimate = self.compute_robust_estimate(&x_reduced, &y_reduced)?;
929 influence[i] = (baseline_estimate - reduced_estimate).abs();
930 }
931
932 Ok(influence)
933 }
934
935 fn compute_leverage_values(&self, x: &Array2<f64>) -> Result<Array1<f64>> {
937 let n_samples = x.nrows();
938 let mut leverage = Array1::zeros(n_samples);
939
940 let (mean, _cov) = self.compute_robust_statistics(x);
942
943 for i in 0..n_samples {
944 let centered = &x.row(i) - &mean;
945 leverage[i] = centered.dot(¢ered); }
947
948 Ok(leverage)
949 }
950
951 fn compute_cook_distances(&self, x: &Array2<f64>, y: &Array1<f64>) -> Result<Array1<f64>> {
953 let n_samples = x.nrows();
954 let mut cook_distances = Array1::zeros(n_samples);
955
956 let influence = self.compute_influence_values(x, y)?;
958 let leverage = self.compute_leverage_values(x)?;
959
960 for i in 0..n_samples {
961 cook_distances[i] = influence[i] * leverage[i];
962 }
963
964 Ok(cook_distances)
965 }
966
967 fn remove_observation(
969 &self,
970 x: &Array2<f64>,
971 y: &Array1<f64>,
972 index: usize,
973 ) -> (Array2<f64>, Array1<f64>) {
974 let n_samples = x.nrows();
975 let n_features = x.ncols();
976
977 let mut x_reduced = Array2::zeros((n_samples - 1, n_features));
978 let mut y_reduced = Array1::zeros(n_samples - 1);
979
980 let mut reduced_idx = 0;
981 for i in 0..n_samples {
982 if i != index {
983 x_reduced.row_mut(reduced_idx).assign(&x.row(i));
984 y_reduced[reduced_idx] = y[i];
985 reduced_idx += 1;
986 }
987 }
988
989 (x_reduced, y_reduced)
990 }
991
992 fn compute_robust_estimate(&self, _x: &Array2<f64>, y: &Array1<f64>) -> Result<f64> {
994 Ok(y.mean().expect("operation should succeed"))
996 }
997
998 fn compute_robust_statistics(&self, x: &Array2<f64>) -> (Array1<f64>, Array2<f64>) {
1000 let n_samples = x.nrows();
1001 let n_features = x.ncols();
1002
1003 let mean = x.mean_axis(Axis(0)).expect("operation should succeed");
1004 let mut cov = Array2::zeros((n_features, n_features));
1005
1006 for sample in x.rows() {
1007 let centered = &sample - &mean;
1008 for i in 0..n_features {
1009 for j in 0..n_features {
1010 cov[[i, j]] += centered[i] * centered[j];
1011 }
1012 }
1013 }
1014 cov /= (n_samples - 1) as f64;
1015
1016 (mean, cov)
1017 }
1018
1019 pub fn get_influence_values(&self) -> Option<&Array1<f64>> {
1021 self.influence_values.as_ref()
1022 }
1023
1024 pub fn get_leverage_values(&self) -> Option<&Array1<f64>> {
1026 self.leverage_values.as_ref()
1027 }
1028
1029 pub fn get_cook_distances(&self) -> Option<&Array1<f64>> {
1031 self.cook_distances.as_ref()
1032 }
1033}
1034
1035#[allow(non_snake_case)]
1036#[cfg(test)]
1037mod tests {
1038 use super::*;
1039 use scirs2_core::ndarray::Array2;
1040
1041 #[test]
1042 fn test_robust_rbf_sampler() {
1043 let x = Array2::from_shape_vec((10, 3), (0..30).map(|i| i as f64).collect())
1044 .expect("operation should succeed");
1045
1046 let mut robust_rbf = RobustRBFSampler::new(50)
1047 .gamma(1.0)
1048 .with_config(RobustKernelConfig::default());
1049
1050 robust_rbf.fit(&x).expect("operation should succeed");
1051 let transformed = robust_rbf.transform(&x).expect("operation should succeed");
1052
1053 assert_eq!(transformed.shape(), &[10, 50]);
1054 assert!(robust_rbf.get_outlier_mask().is_some());
1055 assert!(robust_rbf.get_robust_weights().is_some());
1056 }
1057
1058 #[test]
1059 fn test_robust_estimators() {
1060 let estimators = vec![
1061 RobustEstimator::Huber { delta: 1.345 },
1062 RobustEstimator::Tukey { c: 4.685 },
1063 RobustEstimator::MVE { alpha: 0.5 },
1064 RobustEstimator::MCD { alpha: 0.5 },
1065 ];
1066
1067 let x = Array2::from_shape_vec(
1068 (8, 2),
1069 vec![
1070 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0, 100.0, 100.0, 101.0, 101.0, 5.0, 6.0, 6.0,
1071 7.0,
1072 ],
1073 )
1074 .expect("operation should succeed");
1075
1076 for estimator in estimators {
1077 let config = RobustKernelConfig {
1078 estimator,
1079 ..Default::default()
1080 };
1081
1082 let mut robust_rbf = RobustRBFSampler::new(10).with_config(config);
1083
1084 let result = robust_rbf.fit(&x);
1085 assert!(result.is_ok());
1086 }
1087 }
1088
1089 #[test]
1090 fn test_robust_nystroem() {
1091 let x = Array2::from_shape_vec((10, 3), (0..30).map(|i| i as f64).collect())
1092 .expect("operation should succeed");
1093
1094 let mut robust_nystroem = RobustNystroem::new(5)
1095 .gamma(1.0)
1096 .with_config(RobustKernelConfig::default());
1097
1098 robust_nystroem.fit(&x).expect("operation should succeed");
1099 let transformed = robust_nystroem
1100 .transform(&x)
1101 .expect("operation should succeed");
1102
1103 assert_eq!(transformed.shape(), &[10, 5]);
1104 assert!(robust_nystroem.get_outlier_mask().is_some());
1105 assert!(robust_nystroem.get_robust_weights().is_some());
1106 }
1107
1108 #[test]
1109 fn test_breakdown_point_analysis() {
1110 let x = Array2::from_shape_vec((20, 2), (0..40).map(|i| i as f64).collect())
1111 .expect("operation should succeed");
1112
1113 let mut analysis =
1114 BreakdownPointAnalysis::new().with_estimator(RobustEstimator::Huber { delta: 1.345 });
1115
1116 let breakdown_point = analysis.analyze(&x).expect("operation should succeed");
1117 assert!(breakdown_point >= 0.0 && breakdown_point <= 1.0);
1118
1119 let breakdown_points = analysis.get_breakdown_points();
1120 assert!(!breakdown_points.is_empty());
1121 }
1122
1123 #[test]
1124 fn test_influence_function_diagnostics() {
1125 let x = Array2::from_shape_vec((10, 2), (0..20).map(|i| i as f64).collect())
1126 .expect("operation should succeed");
1127 let y = Array1::from_vec((0..10).map(|i| i as f64).collect());
1128
1129 let mut diagnostics = InfluenceFunctionDiagnostics::new()
1130 .with_estimator(RobustEstimator::Huber { delta: 1.345 });
1131
1132 diagnostics
1133 .compute(&x, &y)
1134 .expect("operation should succeed");
1135
1136 assert!(diagnostics.get_influence_values().is_some());
1137 assert!(diagnostics.get_leverage_values().is_some());
1138 assert!(diagnostics.get_cook_distances().is_some());
1139
1140 let influence = diagnostics
1141 .get_influence_values()
1142 .expect("operation should succeed");
1143 assert_eq!(influence.len(), 10);
1144 }
1145
1146 #[test]
1147 fn test_robust_loss_functions() {
1148 let losses = vec![
1149 RobustLoss::Huber { delta: 1.345 },
1150 RobustLoss::Tukey { c: 4.685 },
1151 RobustLoss::Cauchy { sigma: 1.0 },
1152 RobustLoss::Welsch { sigma: 1.0 },
1153 RobustLoss::Fair { c: 1.0 },
1154 ];
1155
1156 let x = Array2::from_shape_vec((8, 2), (0..16).map(|i| i as f64).collect())
1157 .expect("operation should succeed");
1158
1159 for loss in losses {
1160 let config = RobustKernelConfig {
1161 loss,
1162 ..Default::default()
1163 };
1164
1165 let mut robust_rbf = RobustRBFSampler::new(10).with_config(config);
1166
1167 let result = robust_rbf.fit(&x);
1168 assert!(result.is_ok());
1169 }
1170 }
1171
1172 #[test]
1173 fn test_contamination_resistance() {
1174 let mut x = Array2::from_shape_vec((20, 2), (0..40).map(|i| i as f64 / 10.0).collect())
1176 .expect("operation should succeed");
1177
1178 x[[18, 0]] = 100.0;
1180 x[[18, 1]] = 100.0;
1181 x[[19, 0]] = -100.0;
1182 x[[19, 1]] = -100.0;
1183
1184 let config = RobustKernelConfig {
1185 contamination: 0.2,
1186 ..Default::default()
1187 };
1188
1189 let mut robust_rbf = RobustRBFSampler::new(10).with_config(config);
1190
1191 robust_rbf.fit(&x).expect("operation should succeed");
1192
1193 let outlier_mask = robust_rbf
1194 .get_outlier_mask()
1195 .expect("operation should succeed");
1196 assert!(outlier_mask[18] || outlier_mask[19]); }
1198}