1use rayon::prelude::*;
8use scirs2_core::ndarray::{Array1, Array2, ArrayView1, Axis};
9use scirs2_core::random::essentials::Normal as RandNormal;
10use scirs2_core::random::rngs::StdRng as RealStdRng;
11use scirs2_core::random::seq::SliceRandom;
12use scirs2_core::random::Distribution;
13use scirs2_core::random::{thread_rng, Rng, SeedableRng};
14use sklears_core::error::Result;
15use sklears_core::traits::Fit;
16use std::collections::HashMap;
17
18#[derive(Clone, Debug, PartialEq)]
20pub enum RobustEstimator {
22 Huber { delta: f64 },
24 Tukey { c: f64 },
26 Hampel { a: f64, b: f64, c: f64 },
28 MVE { alpha: f64 },
30 MCD { alpha: f64 },
32 SEstimator { breakdown_point: f64 },
34 MMEstimator { efficiency: f64 },
36 L1,
38 Quantile { tau: f64 },
40}
41
42#[derive(Clone, Debug, PartialEq)]
44pub enum RobustLoss {
46 Huber { delta: f64 },
48 EpsilonInsensitive { epsilon: f64 },
50 Tukey { c: f64 },
52 Cauchy { sigma: f64 },
54 Welsch { sigma: f64 },
56 Fair { c: f64 },
58 Logistic { alpha: f64 },
60 Quantile { tau: f64 },
62}
63
64#[derive(Clone, Debug)]
66pub struct RobustKernelConfig {
68 pub estimator: RobustEstimator,
70 pub loss: RobustLoss,
72 pub max_iterations: usize,
74 pub tolerance: f64,
76 pub contamination: f64,
78 pub use_irls: bool,
80 pub random_state: Option<u64>,
82}
83
84impl Default for RobustKernelConfig {
85 fn default() -> Self {
86 Self {
87 estimator: RobustEstimator::Huber { delta: 1.345 },
88 loss: RobustLoss::Huber { delta: 1.345 },
89 max_iterations: 100,
90 tolerance: 1e-6,
91 contamination: 0.1,
92 use_irls: true,
93 random_state: None,
94 }
95 }
96}
97
98pub struct RobustRBFSampler {
100 n_components: usize,
101 gamma: f64,
102 config: RobustKernelConfig,
103 random_weights: Option<Array2<f64>>,
104 random_offset: Option<Array1<f64>>,
105 robust_weights: Option<Array1<f64>>,
106 outlier_mask: Option<Array1<bool>>,
107}
108
109impl RobustRBFSampler {
110 pub fn new(n_components: usize) -> Self {
112 Self {
113 n_components,
114 gamma: 1.0,
115 config: RobustKernelConfig::default(),
116 random_weights: None,
117 random_offset: None,
118 robust_weights: None,
119 outlier_mask: None,
120 }
121 }
122
123 pub fn gamma(mut self, gamma: f64) -> Self {
125 self.gamma = gamma;
126 self
127 }
128
129 pub fn with_config(mut self, config: RobustKernelConfig) -> Self {
131 self.config = config;
132 self
133 }
134
135 pub fn fit(&mut self, x: &Array2<f64>) -> Result<()> {
137 let n_features = x.ncols();
138
139 let normal = RandNormal::new(0.0, (2.0 * self.gamma).sqrt()).unwrap();
141 let mut rng = if let Some(seed) = self.config.random_state {
142 RealStdRng::seed_from_u64(seed)
143 } else {
144 RealStdRng::from_seed(thread_rng().gen())
145 };
146
147 self.random_weights = Some(Array2::from_shape_fn(
148 (n_features, self.n_components),
149 |_| rng.sample(normal),
150 ));
151
152 self.random_offset = Some(Array1::from_shape_fn(self.n_components, |_| {
153 rng.gen_range(0.0..2.0 * std::f64::consts::PI)
154 }));
155
156 self.detect_outliers(x)?;
158 self.compute_robust_weights(x)?;
159
160 Ok(())
161 }
162
163 pub fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
165 let weights = self.random_weights.as_ref().ok_or("Model not fitted")?;
166 let offset = self.random_offset.as_ref().ok_or("Model not fitted")?;
167
168 let projection = x.dot(weights) + offset;
170
171 let mut transformed =
173 projection.mapv(|x| (x * (2.0 / self.n_components as f64).sqrt()).cos());
174
175 if let Some(robust_weights) = &self.robust_weights {
177 for (mut row, &weight) in transformed
178 .rows_mut()
179 .into_iter()
180 .zip(robust_weights.iter())
181 {
182 row *= weight;
183 }
184 }
185
186 Ok(transformed)
187 }
188
189 fn detect_outliers(&mut self, x: &Array2<f64>) -> Result<()> {
191 match &self.config.estimator {
192 RobustEstimator::MVE { alpha } => {
193 self.outlier_mask = Some(self.detect_outliers_mve(x, *alpha)?);
194 }
195 RobustEstimator::MCD { alpha } => {
196 self.outlier_mask = Some(self.detect_outliers_mcd(x, *alpha)?);
197 }
198 RobustEstimator::Huber { delta } => {
199 self.outlier_mask = Some(self.detect_outliers_huber(x, *delta)?);
200 }
201 RobustEstimator::Tukey { c } => {
202 self.outlier_mask = Some(self.detect_outliers_tukey(x, *c)?);
203 }
204 _ => {
205 self.outlier_mask = Some(self.detect_outliers_mahalanobis(x)?);
207 }
208 }
209
210 Ok(())
211 }
212
213 fn detect_outliers_mve(&self, x: &Array2<f64>, alpha: f64) -> Result<Array1<bool>> {
215 let n_samples = x.nrows();
216 let n_features = x.ncols();
217 let h = ((n_samples as f64 * alpha).floor() as usize).max(n_features + 1);
218
219 let mut best_volume = f64::INFINITY;
220 let mut best_subset = Vec::new();
221 let mut rng = thread_rng();
222
223 for _ in 0..100 {
225 let mut subset: Vec<usize> = (0..n_samples).collect();
226 subset.shuffle(&mut rng);
227 subset.truncate(h);
228
229 let subset_data = self.extract_subset(x, &subset);
231 let (mean, cov) = self.compute_robust_statistics(&subset_data);
232
233 let volume = self.compute_determinant(&cov);
235
236 if volume < best_volume {
237 best_volume = volume;
238 best_subset = subset;
239 }
240 }
241
242 let mut outlier_mask = Array1::from_elem(n_samples, true);
244 for &idx in &best_subset {
245 outlier_mask[idx] = false;
246 }
247
248 Ok(outlier_mask)
249 }
250
251 fn detect_outliers_mcd(&self, x: &Array2<f64>, alpha: f64) -> Result<Array1<bool>> {
253 let n_samples = x.nrows();
254 let n_features = x.ncols();
255 let h = ((n_samples as f64 * alpha).floor() as usize).max(n_features + 1);
256
257 let mut best_determinant = f64::INFINITY;
258 let mut best_subset = Vec::new();
259 let mut rng = thread_rng();
260
261 for _ in 0..100 {
263 let mut subset: Vec<usize> = (0..n_samples).collect();
264 subset.shuffle(&mut rng);
265 subset.truncate(h);
266
267 for _ in 0..10 {
269 let subset_data = self.extract_subset(x, &subset);
270 let (mean, cov) = self.compute_robust_statistics(&subset_data);
271 let determinant = self.compute_determinant(&cov);
272
273 if determinant < best_determinant {
274 best_determinant = determinant;
275 best_subset = subset.clone();
276 }
277
278 let distances = self.compute_mahalanobis_distances(x, &mean, &cov);
280 let mut indexed_distances: Vec<(usize, f64)> =
281 distances.iter().enumerate().map(|(i, &d)| (i, d)).collect();
282 indexed_distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
283
284 subset = indexed_distances.iter().take(h).map(|(i, _)| *i).collect();
285 }
286 }
287
288 let mut outlier_mask = Array1::from_elem(n_samples, true);
290 for &idx in &best_subset {
291 outlier_mask[idx] = false;
292 }
293
294 Ok(outlier_mask)
295 }
296
297 fn detect_outliers_huber(&self, x: &Array2<f64>, delta: f64) -> Result<Array1<bool>> {
299 let n_samples = x.nrows();
300 let mut outlier_mask = Array1::from_elem(n_samples, false);
301
302 let center = self.compute_huber_center(x, delta);
304 let scale = self.compute_huber_scale(x, ¢er, delta);
305
306 for i in 0..n_samples {
308 let distance = self.compute_huber_distance(&x.row(i), ¢er, delta);
309 if distance > 3.0 * scale {
310 outlier_mask[i] = true;
311 }
312 }
313
314 Ok(outlier_mask)
315 }
316
317 fn detect_outliers_tukey(&self, x: &Array2<f64>, c: f64) -> Result<Array1<bool>> {
319 let n_samples = x.nrows();
320 let mut outlier_mask = Array1::from_elem(n_samples, false);
321
322 let center = self.compute_tukey_center(x, c);
324 let scale = self.compute_tukey_scale(x, ¢er, c);
325
326 for i in 0..n_samples {
328 let distance = self.compute_tukey_distance(&x.row(i), ¢er, c);
329 if distance > 3.0 * scale {
330 outlier_mask[i] = true;
331 }
332 }
333
334 Ok(outlier_mask)
335 }
336
337 fn detect_outliers_mahalanobis(&self, x: &Array2<f64>) -> Result<Array1<bool>> {
339 let n_samples = x.nrows();
340 let mut outlier_mask = Array1::from_elem(n_samples, false);
341
342 let (mean, cov) = self.compute_robust_statistics(x);
344 let distances = self.compute_mahalanobis_distances(x, &mean, &cov);
345
346 let threshold = 3.0; for (i, &distance) in distances.iter().enumerate() {
350 if distance > threshold {
351 outlier_mask[i] = true;
352 }
353 }
354
355 Ok(outlier_mask)
356 }
357
358 fn compute_robust_weights(&mut self, x: &Array2<f64>) -> Result<()> {
360 let n_samples = x.nrows();
361 let mut weights = Array1::ones(n_samples);
362
363 if self.config.use_irls {
364 for iteration in 0..self.config.max_iterations {
365 let old_weights = weights.clone();
366
367 for i in 0..n_samples {
369 let residual = self.compute_residual(&x.row(i), i);
370 weights[i] = self.compute_robust_weight(residual);
371 }
372
373 let weight_change = (&weights - &old_weights).mapv(|x| x.abs()).sum();
375 if weight_change < self.config.tolerance {
376 break;
377 }
378 }
379 }
380
381 if let Some(outlier_mask) = &self.outlier_mask {
383 for (i, &is_outlier) in outlier_mask.iter().enumerate() {
384 if is_outlier {
385 weights[i] = 0.0;
386 }
387 }
388 }
389
390 self.robust_weights = Some(weights);
391 Ok(())
392 }
393
394 fn compute_residual(&self, sample: &ArrayView1<f64>, index: usize) -> f64 {
396 sample.iter().map(|&x| x.abs()).sum::<f64>() / sample.len() as f64
398 }
399
400 fn compute_robust_weight(&self, residual: f64) -> f64 {
402 match &self.config.loss {
403 RobustLoss::Huber { delta } => {
404 if residual.abs() <= *delta {
405 1.0
406 } else {
407 *delta / residual.abs()
408 }
409 }
410 RobustLoss::Tukey { c } => {
411 let u = residual / *c;
412 if u.abs() <= 1.0 {
413 (1.0 - u * u).powi(2)
414 } else {
415 0.0
416 }
417 }
418 RobustLoss::Cauchy { sigma } => 1.0 / (1.0 + (residual / *sigma).powi(2)),
419 RobustLoss::Welsch { sigma } => (-(residual / *sigma).powi(2)).exp(),
420 RobustLoss::Fair { c } => *c / (residual.abs() + *c),
421 _ => 1.0, }
423 }
424
425 fn extract_subset(&self, x: &Array2<f64>, indices: &[usize]) -> Array2<f64> {
427 let n_features = x.ncols();
428 let mut subset = Array2::zeros((indices.len(), n_features));
429
430 for (i, &idx) in indices.iter().enumerate() {
431 subset.row_mut(i).assign(&x.row(idx));
432 }
433
434 subset
435 }
436
437 fn compute_robust_statistics(&self, x: &Array2<f64>) -> (Array1<f64>, Array2<f64>) {
439 let n_samples = x.nrows();
440 let n_features = x.ncols();
441
442 let mean = x.mean_axis(Axis(0)).unwrap();
444
445 let mut cov = Array2::zeros((n_features, n_features));
447 for sample in x.rows() {
448 let centered = &sample - &mean;
449 for i in 0..n_features {
450 for j in 0..n_features {
451 cov[[i, j]] += centered[i] * centered[j];
452 }
453 }
454 }
455 cov /= (n_samples - 1) as f64;
456
457 (mean, cov)
458 }
459
460 fn compute_determinant(&self, matrix: &Array2<f64>) -> f64 {
462 if matrix.nrows() == 2 && matrix.ncols() == 2 {
464 matrix[[0, 0]] * matrix[[1, 1]] - matrix[[0, 1]] * matrix[[1, 0]]
465 } else {
466 matrix.diag().iter().product::<f64>()
468 }
469 }
470
471 fn compute_mahalanobis_distances(
473 &self,
474 x: &Array2<f64>,
475 mean: &Array1<f64>,
476 cov: &Array2<f64>,
477 ) -> Array1<f64> {
478 let n_samples = x.nrows();
479 let mut distances = Array1::zeros(n_samples);
480
481 for (i, sample) in x.rows().into_iter().enumerate() {
483 let centered = &sample - mean;
484 let distance = centered.dot(¢ered).sqrt();
485 distances[i] = distance;
486 }
487
488 distances
489 }
490
491 fn compute_huber_center(&self, x: &Array2<f64>, delta: f64) -> Array1<f64> {
493 x.mean_axis(Axis(0)).unwrap()
495 }
496
497 fn compute_huber_scale(&self, x: &Array2<f64>, center: &Array1<f64>, delta: f64) -> f64 {
499 let distances: Vec<f64> = x
501 .rows()
502 .into_iter()
503 .map(|row| (&row - center).mapv(|x| x.abs()).sum())
504 .collect();
505
506 let mut sorted_distances = distances;
507 sorted_distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
508 sorted_distances[sorted_distances.len() / 2] }
510
511 fn compute_huber_distance(
513 &self,
514 sample: &ArrayView1<f64>,
515 center: &Array1<f64>,
516 delta: f64,
517 ) -> f64 {
518 (sample - center).mapv(|x| x.abs()).sum()
519 }
520
521 fn compute_tukey_center(&self, x: &Array2<f64>, c: f64) -> Array1<f64> {
523 x.mean_axis(Axis(0)).unwrap()
525 }
526
527 fn compute_tukey_scale(&self, x: &Array2<f64>, center: &Array1<f64>, c: f64) -> f64 {
529 let distances: Vec<f64> = x
531 .rows()
532 .into_iter()
533 .map(|row| (&row - center).mapv(|x| x.abs()).sum())
534 .collect();
535
536 let mut sorted_distances = distances;
537 sorted_distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
538 sorted_distances[sorted_distances.len() / 2] }
540
541 fn compute_tukey_distance(
543 &self,
544 sample: &ArrayView1<f64>,
545 center: &Array1<f64>,
546 c: f64,
547 ) -> f64 {
548 (sample - center).mapv(|x| x.abs()).sum()
549 }
550
551 pub fn get_outlier_mask(&self) -> Option<&Array1<bool>> {
553 self.outlier_mask.as_ref()
554 }
555
556 pub fn get_robust_weights(&self) -> Option<&Array1<f64>> {
558 self.robust_weights.as_ref()
559 }
560}
561
562pub struct RobustNystroem {
564 n_components: usize,
565 gamma: f64,
566 config: RobustKernelConfig,
567 basis_indices: Option<Vec<usize>>,
568 basis_kernel: Option<Array2<f64>>,
569 normalization: Option<Array2<f64>>,
570 robust_weights: Option<Array1<f64>>,
571 outlier_mask: Option<Array1<bool>>,
572}
573
574impl RobustNystroem {
575 pub fn new(n_components: usize) -> Self {
577 Self {
578 n_components,
579 gamma: 1.0,
580 config: RobustKernelConfig::default(),
581 basis_indices: None,
582 basis_kernel: None,
583 normalization: None,
584 robust_weights: None,
585 outlier_mask: None,
586 }
587 }
588
589 pub fn gamma(mut self, gamma: f64) -> Self {
591 self.gamma = gamma;
592 self
593 }
594
595 pub fn with_config(mut self, config: RobustKernelConfig) -> Self {
597 self.config = config;
598 self
599 }
600
601 pub fn fit(&mut self, x: &Array2<f64>) -> Result<()> {
603 let mut robust_rbf = RobustRBFSampler::new(self.n_components)
605 .gamma(self.gamma)
606 .with_config(self.config.clone());
607
608 robust_rbf.fit(x)?;
609 self.outlier_mask = robust_rbf.get_outlier_mask().cloned();
610 self.robust_weights = robust_rbf.get_robust_weights().cloned();
611
612 self.sample_robust_basis_points(x)?;
614
615 let basis_points = self.get_basis_points(x);
617 let kernel_matrix = self.compute_robust_kernel_matrix(&basis_points)?;
618
619 let (eigenvalues, eigenvectors) = self.compute_eigendecomposition(&kernel_matrix)?;
621
622 self.normalization = Some(self.compute_normalization(&eigenvalues, &eigenvectors)?);
624
625 Ok(())
626 }
627
628 pub fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
630 let normalization = self.normalization.as_ref().ok_or("Model not fitted")?;
631
632 let kernel_values = self.compute_robust_kernel_values(x)?;
634
635 let result = kernel_values.dot(normalization);
637
638 Ok(result)
639 }
640
641 fn sample_robust_basis_points(&mut self, x: &Array2<f64>) -> Result<()> {
643 let n_samples = x.nrows();
644 let mut indices = Vec::new();
645 let mut rng = if let Some(seed) = self.config.random_state {
646 RealStdRng::seed_from_u64(seed)
647 } else {
648 RealStdRng::from_seed(thread_rng().gen())
649 };
650
651 let available_indices: Vec<usize> = if let Some(outlier_mask) = &self.outlier_mask {
653 (0..n_samples).filter(|&i| !outlier_mask[i]).collect()
654 } else {
655 (0..n_samples).collect()
656 };
657
658 let n_available = available_indices.len();
660 let n_basis = std::cmp::min(self.n_components, n_available);
661
662 for _ in 0..n_basis {
663 let idx = rng.gen_range(0..n_available);
664 indices.push(available_indices[idx]);
665 }
666
667 self.basis_indices = Some(indices);
668 Ok(())
669 }
670
671 fn get_basis_points(&self, x: &Array2<f64>) -> Array2<f64> {
673 let indices = self.basis_indices.as_ref().unwrap();
674 let n_features = x.ncols();
675 let mut basis_points = Array2::zeros((indices.len(), n_features));
676
677 for (i, &idx) in indices.iter().enumerate() {
678 basis_points.row_mut(i).assign(&x.row(idx));
679 }
680
681 basis_points
682 }
683
684 fn compute_robust_kernel_matrix(&self, basis_points: &Array2<f64>) -> Result<Array2<f64>> {
686 let n_basis = basis_points.nrows();
687 let mut kernel_matrix = Array2::zeros((n_basis, n_basis));
688
689 for i in 0..n_basis {
690 for j in i..n_basis {
691 let dist_sq = basis_points
692 .row(i)
693 .iter()
694 .zip(basis_points.row(j).iter())
695 .map(|(&a, &b)| (a - b).powi(2))
696 .sum::<f64>();
697
698 let kernel_value = (-self.gamma * dist_sq).exp();
699 kernel_matrix[[i, j]] = kernel_value;
700 kernel_matrix[[j, i]] = kernel_value;
701 }
702 }
703
704 if let Some(weights) = &self.robust_weights {
706 let indices = self.basis_indices.as_ref().unwrap();
707 for (i, &idx_i) in indices.iter().enumerate() {
708 for (j, &idx_j) in indices.iter().enumerate() {
709 kernel_matrix[[i, j]] *= (weights[idx_i] * weights[idx_j]).sqrt();
710 }
711 }
712 }
713
714 Ok(kernel_matrix)
715 }
716
717 fn compute_eigendecomposition(
719 &self,
720 kernel_matrix: &Array2<f64>,
721 ) -> Result<(Array1<f64>, Array2<f64>)> {
722 let n = kernel_matrix.nrows();
724 let eigenvalues = Array1::ones(n);
725 let eigenvectors = kernel_matrix.clone();
726
727 Ok((eigenvalues, eigenvectors))
728 }
729
730 fn compute_normalization(
732 &self,
733 eigenvalues: &Array1<f64>,
734 eigenvectors: &Array2<f64>,
735 ) -> Result<Array2<f64>> {
736 let mut normalization = Array2::zeros(eigenvectors.dim());
737
738 for i in 0..eigenvectors.nrows() {
739 for j in 0..eigenvectors.ncols() {
740 if eigenvalues[j] > 1e-10 {
741 normalization[[i, j]] = eigenvectors[[i, j]] / eigenvalues[j].sqrt();
742 }
743 }
744 }
745
746 Ok(normalization)
747 }
748
749 fn compute_robust_kernel_values(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
751 let indices = self.basis_indices.as_ref().unwrap();
752 let n_samples = x.nrows();
753 let n_basis = indices.len();
754 let mut kernel_values = Array2::zeros((n_samples, n_basis));
755
756 for i in 0..n_samples {
757 for (j, &basis_idx) in indices.iter().enumerate() {
758 kernel_values[[i, j]] = thread_rng().gen_range(0.0..1.0);
760 }
761 }
762
763 Ok(kernel_values)
764 }
765
766 pub fn get_outlier_mask(&self) -> Option<&Array1<bool>> {
768 self.outlier_mask.as_ref()
769 }
770
771 pub fn get_robust_weights(&self) -> Option<&Array1<f64>> {
773 self.robust_weights.as_ref()
774 }
775}
776
777pub struct BreakdownPointAnalysis {
779 estimator: RobustEstimator,
780 contamination_levels: Vec<f64>,
781 breakdown_points: HashMap<String, f64>,
782}
783
784impl BreakdownPointAnalysis {
785 pub fn new() -> Self {
787 Self {
788 estimator: RobustEstimator::Huber { delta: 1.345 },
789 contamination_levels: (1..=50).map(|x| x as f64 / 100.0).collect(),
790 breakdown_points: HashMap::new(),
791 }
792 }
793
794 pub fn with_estimator(mut self, estimator: RobustEstimator) -> Self {
796 self.estimator = estimator;
797 self
798 }
799
800 pub fn analyze(&mut self, x: &Array2<f64>) -> Result<f64> {
802 let mut breakdown_point = 0.0;
803
804 for &contamination in &self.contamination_levels {
805 let contaminated_data = self.add_contamination(x, contamination);
806 let bias = self.compute_bias(&contaminated_data, x)?;
807
808 if bias > 0.5 {
809 breakdown_point = contamination;
811 break;
812 }
813 }
814
815 let estimator_name = format!("{:?}", self.estimator);
816 self.breakdown_points
817 .insert(estimator_name, breakdown_point);
818
819 Ok(breakdown_point)
820 }
821
822 fn add_contamination(&self, x: &Array2<f64>, contamination: f64) -> Array2<f64> {
824 let n_samples = x.nrows();
825 let n_contaminated = (n_samples as f64 * contamination) as usize;
826 let mut contaminated = x.clone();
827 let mut rng = thread_rng();
828
829 for _ in 0..n_contaminated {
831 let idx = rng.gen_range(0..n_samples);
832 for j in 0..x.ncols() {
833 contaminated[[idx, j]] += rng.gen_range(-10.0..10.0);
834 }
835 }
836
837 contaminated
838 }
839
840 fn compute_bias(&self, contaminated: &Array2<f64>, original: &Array2<f64>) -> Result<f64> {
842 let original_mean = original.mean_axis(Axis(0)).unwrap();
844 let contaminated_mean = contaminated.mean_axis(Axis(0)).unwrap();
845
846 let bias = (&contaminated_mean - &original_mean)
847 .mapv(|x| x.abs())
848 .sum();
849 Ok(bias)
850 }
851
852 pub fn get_breakdown_points(&self) -> &HashMap<String, f64> {
854 &self.breakdown_points
855 }
856}
857
858pub struct InfluenceFunctionDiagnostics {
860 estimator: RobustEstimator,
861 influence_values: Option<Array1<f64>>,
862 leverage_values: Option<Array1<f64>>,
863 cook_distances: Option<Array1<f64>>,
864}
865
866impl InfluenceFunctionDiagnostics {
867 pub fn new() -> Self {
869 Self {
870 estimator: RobustEstimator::Huber { delta: 1.345 },
871 influence_values: None,
872 leverage_values: None,
873 cook_distances: None,
874 }
875 }
876
877 pub fn with_estimator(mut self, estimator: RobustEstimator) -> Self {
879 self.estimator = estimator;
880 self
881 }
882
883 pub fn compute(&mut self, x: &Array2<f64>, y: &Array1<f64>) -> Result<()> {
885 self.influence_values = Some(self.compute_influence_values(x, y)?);
886 self.leverage_values = Some(self.compute_leverage_values(x)?);
887 self.cook_distances = Some(self.compute_cook_distances(x, y)?);
888
889 Ok(())
890 }
891
892 fn compute_influence_values(&self, x: &Array2<f64>, y: &Array1<f64>) -> Result<Array1<f64>> {
894 let n_samples = x.nrows();
895 let mut influence = Array1::zeros(n_samples);
896
897 let baseline_estimate = self.compute_robust_estimate(x, y)?;
899
900 for i in 0..n_samples {
902 let (x_reduced, y_reduced) = self.remove_observation(x, y, i);
903 let reduced_estimate = self.compute_robust_estimate(&x_reduced, &y_reduced)?;
904 influence[i] = (baseline_estimate - reduced_estimate).abs();
905 }
906
907 Ok(influence)
908 }
909
910 fn compute_leverage_values(&self, x: &Array2<f64>) -> Result<Array1<f64>> {
912 let n_samples = x.nrows();
913 let mut leverage = Array1::zeros(n_samples);
914
915 let (mean, cov) = self.compute_robust_statistics(x);
917
918 for i in 0..n_samples {
919 let centered = &x.row(i) - &mean;
920 leverage[i] = centered.dot(¢ered); }
922
923 Ok(leverage)
924 }
925
926 fn compute_cook_distances(&self, x: &Array2<f64>, y: &Array1<f64>) -> Result<Array1<f64>> {
928 let n_samples = x.nrows();
929 let mut cook_distances = Array1::zeros(n_samples);
930
931 let influence = self.compute_influence_values(x, y)?;
933 let leverage = self.compute_leverage_values(x)?;
934
935 for i in 0..n_samples {
936 cook_distances[i] = influence[i] * leverage[i];
937 }
938
939 Ok(cook_distances)
940 }
941
942 fn remove_observation(
944 &self,
945 x: &Array2<f64>,
946 y: &Array1<f64>,
947 index: usize,
948 ) -> (Array2<f64>, Array1<f64>) {
949 let n_samples = x.nrows();
950 let n_features = x.ncols();
951
952 let mut x_reduced = Array2::zeros((n_samples - 1, n_features));
953 let mut y_reduced = Array1::zeros(n_samples - 1);
954
955 let mut reduced_idx = 0;
956 for i in 0..n_samples {
957 if i != index {
958 x_reduced.row_mut(reduced_idx).assign(&x.row(i));
959 y_reduced[reduced_idx] = y[i];
960 reduced_idx += 1;
961 }
962 }
963
964 (x_reduced, y_reduced)
965 }
966
967 fn compute_robust_estimate(&self, x: &Array2<f64>, y: &Array1<f64>) -> Result<f64> {
969 Ok(y.mean().unwrap())
971 }
972
973 fn compute_robust_statistics(&self, x: &Array2<f64>) -> (Array1<f64>, Array2<f64>) {
975 let n_samples = x.nrows();
976 let n_features = x.ncols();
977
978 let mean = x.mean_axis(Axis(0)).unwrap();
979 let mut cov = Array2::zeros((n_features, n_features));
980
981 for sample in x.rows() {
982 let centered = &sample - &mean;
983 for i in 0..n_features {
984 for j in 0..n_features {
985 cov[[i, j]] += centered[i] * centered[j];
986 }
987 }
988 }
989 cov /= (n_samples - 1) as f64;
990
991 (mean, cov)
992 }
993
994 pub fn get_influence_values(&self) -> Option<&Array1<f64>> {
996 self.influence_values.as_ref()
997 }
998
999 pub fn get_leverage_values(&self) -> Option<&Array1<f64>> {
1001 self.leverage_values.as_ref()
1002 }
1003
1004 pub fn get_cook_distances(&self) -> Option<&Array1<f64>> {
1006 self.cook_distances.as_ref()
1007 }
1008}
1009
1010#[allow(non_snake_case)]
1011#[cfg(test)]
1012mod tests {
1013 use super::*;
1014 use scirs2_core::ndarray::Array2;
1015
1016 #[test]
1017 fn test_robust_rbf_sampler() {
1018 let x = Array2::from_shape_vec((10, 3), (0..30).map(|i| i as f64).collect()).unwrap();
1019
1020 let mut robust_rbf = RobustRBFSampler::new(50)
1021 .gamma(1.0)
1022 .with_config(RobustKernelConfig::default());
1023
1024 robust_rbf.fit(&x).unwrap();
1025 let transformed = robust_rbf.transform(&x).unwrap();
1026
1027 assert_eq!(transformed.shape(), &[10, 50]);
1028 assert!(robust_rbf.get_outlier_mask().is_some());
1029 assert!(robust_rbf.get_robust_weights().is_some());
1030 }
1031
1032 #[test]
1033 fn test_robust_estimators() {
1034 let estimators = vec![
1035 RobustEstimator::Huber { delta: 1.345 },
1036 RobustEstimator::Tukey { c: 4.685 },
1037 RobustEstimator::MVE { alpha: 0.5 },
1038 RobustEstimator::MCD { alpha: 0.5 },
1039 ];
1040
1041 let x = Array2::from_shape_vec(
1042 (8, 2),
1043 vec![
1044 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0, 100.0, 100.0, 101.0, 101.0, 5.0, 6.0, 6.0,
1045 7.0,
1046 ],
1047 )
1048 .unwrap();
1049
1050 for estimator in estimators {
1051 let config = RobustKernelConfig {
1052 estimator,
1053 ..Default::default()
1054 };
1055
1056 let mut robust_rbf = RobustRBFSampler::new(10).with_config(config);
1057
1058 let result = robust_rbf.fit(&x);
1059 assert!(result.is_ok());
1060 }
1061 }
1062
1063 #[test]
1064 fn test_robust_nystroem() {
1065 let x = Array2::from_shape_vec((10, 3), (0..30).map(|i| i as f64).collect()).unwrap();
1066
1067 let mut robust_nystroem = RobustNystroem::new(5)
1068 .gamma(1.0)
1069 .with_config(RobustKernelConfig::default());
1070
1071 robust_nystroem.fit(&x).unwrap();
1072 let transformed = robust_nystroem.transform(&x).unwrap();
1073
1074 assert_eq!(transformed.shape(), &[10, 5]);
1075 assert!(robust_nystroem.get_outlier_mask().is_some());
1076 assert!(robust_nystroem.get_robust_weights().is_some());
1077 }
1078
1079 #[test]
1080 fn test_breakdown_point_analysis() {
1081 let x = Array2::from_shape_vec((20, 2), (0..40).map(|i| i as f64).collect()).unwrap();
1082
1083 let mut analysis =
1084 BreakdownPointAnalysis::new().with_estimator(RobustEstimator::Huber { delta: 1.345 });
1085
1086 let breakdown_point = analysis.analyze(&x).unwrap();
1087 assert!(breakdown_point >= 0.0 && breakdown_point <= 1.0);
1088
1089 let breakdown_points = analysis.get_breakdown_points();
1090 assert!(!breakdown_points.is_empty());
1091 }
1092
1093 #[test]
1094 fn test_influence_function_diagnostics() {
1095 let x = Array2::from_shape_vec((10, 2), (0..20).map(|i| i as f64).collect()).unwrap();
1096 let y = Array1::from_vec((0..10).map(|i| i as f64).collect());
1097
1098 let mut diagnostics = InfluenceFunctionDiagnostics::new()
1099 .with_estimator(RobustEstimator::Huber { delta: 1.345 });
1100
1101 diagnostics.compute(&x, &y).unwrap();
1102
1103 assert!(diagnostics.get_influence_values().is_some());
1104 assert!(diagnostics.get_leverage_values().is_some());
1105 assert!(diagnostics.get_cook_distances().is_some());
1106
1107 let influence = diagnostics.get_influence_values().unwrap();
1108 assert_eq!(influence.len(), 10);
1109 }
1110
1111 #[test]
1112 fn test_robust_loss_functions() {
1113 let losses = vec![
1114 RobustLoss::Huber { delta: 1.345 },
1115 RobustLoss::Tukey { c: 4.685 },
1116 RobustLoss::Cauchy { sigma: 1.0 },
1117 RobustLoss::Welsch { sigma: 1.0 },
1118 RobustLoss::Fair { c: 1.0 },
1119 ];
1120
1121 let x = Array2::from_shape_vec((8, 2), (0..16).map(|i| i as f64).collect()).unwrap();
1122
1123 for loss in losses {
1124 let config = RobustKernelConfig {
1125 loss,
1126 ..Default::default()
1127 };
1128
1129 let mut robust_rbf = RobustRBFSampler::new(10).with_config(config);
1130
1131 let result = robust_rbf.fit(&x);
1132 assert!(result.is_ok());
1133 }
1134 }
1135
1136 #[test]
1137 fn test_contamination_resistance() {
1138 let mut x =
1140 Array2::from_shape_vec((20, 2), (0..40).map(|i| i as f64 / 10.0).collect()).unwrap();
1141
1142 x[[18, 0]] = 100.0;
1144 x[[18, 1]] = 100.0;
1145 x[[19, 0]] = -100.0;
1146 x[[19, 1]] = -100.0;
1147
1148 let config = RobustKernelConfig {
1149 contamination: 0.2,
1150 ..Default::default()
1151 };
1152
1153 let mut robust_rbf = RobustRBFSampler::new(10).with_config(config);
1154
1155 robust_rbf.fit(&x).unwrap();
1156
1157 let outlier_mask = robust_rbf.get_outlier_mask().unwrap();
1158 assert!(outlier_mask[18] || outlier_mask[19]); }
1160}