1use scirs2_core::ndarray::{Array1, Array2, ArrayView1, Axis};
8use scirs2_core::random::essentials::Normal as RandNormal;
9use scirs2_core::random::rngs::StdRng as RealStdRng;
10use scirs2_core::random::seq::SliceRandom;
11use scirs2_core::random::Rng;
12use scirs2_core::random::{thread_rng, SeedableRng};
13use sklears_core::error::Result;
14use std::collections::HashMap;
15
16#[derive(Clone, Debug, PartialEq)]
18pub enum RobustEstimator {
20 Huber { delta: f64 },
22 Tukey { c: f64 },
24 Hampel { a: f64, b: f64, c: f64 },
26 MVE { alpha: f64 },
28 MCD { alpha: f64 },
30 SEstimator { breakdown_point: f64 },
32 MMEstimator { efficiency: f64 },
34 L1,
36 Quantile { tau: f64 },
38}
39
40#[derive(Clone, Debug, PartialEq)]
42pub enum RobustLoss {
44 Huber { delta: f64 },
46 EpsilonInsensitive { epsilon: f64 },
48 Tukey { c: f64 },
50 Cauchy { sigma: f64 },
52 Welsch { sigma: f64 },
54 Fair { c: f64 },
56 Logistic { alpha: f64 },
58 Quantile { tau: f64 },
60}
61
62#[derive(Clone, Debug)]
64pub struct RobustKernelConfig {
66 pub estimator: RobustEstimator,
68 pub loss: RobustLoss,
70 pub max_iterations: usize,
72 pub tolerance: f64,
74 pub contamination: f64,
76 pub use_irls: bool,
78 pub random_state: Option<u64>,
80}
81
82impl Default for RobustKernelConfig {
83 fn default() -> Self {
84 Self {
85 estimator: RobustEstimator::Huber { delta: 1.345 },
86 loss: RobustLoss::Huber { delta: 1.345 },
87 max_iterations: 100,
88 tolerance: 1e-6,
89 contamination: 0.1,
90 use_irls: true,
91 random_state: None,
92 }
93 }
94}
95
96pub struct RobustRBFSampler {
98 n_components: usize,
99 gamma: f64,
100 config: RobustKernelConfig,
101 random_weights: Option<Array2<f64>>,
102 random_offset: Option<Array1<f64>>,
103 robust_weights: Option<Array1<f64>>,
104 outlier_mask: Option<Array1<bool>>,
105}
106
107impl RobustRBFSampler {
108 pub fn new(n_components: usize) -> Self {
110 Self {
111 n_components,
112 gamma: 1.0,
113 config: RobustKernelConfig::default(),
114 random_weights: None,
115 random_offset: None,
116 robust_weights: None,
117 outlier_mask: None,
118 }
119 }
120
121 pub fn gamma(mut self, gamma: f64) -> Self {
123 self.gamma = gamma;
124 self
125 }
126
127 pub fn with_config(mut self, config: RobustKernelConfig) -> Self {
129 self.config = config;
130 self
131 }
132
133 pub fn fit(&mut self, x: &Array2<f64>) -> Result<()> {
135 let n_features = x.ncols();
136
137 let normal = RandNormal::new(0.0, (2.0 * self.gamma).sqrt()).unwrap();
139 let mut rng = if let Some(seed) = self.config.random_state {
140 RealStdRng::seed_from_u64(seed)
141 } else {
142 RealStdRng::from_seed(thread_rng().gen())
143 };
144
145 self.random_weights = Some(Array2::from_shape_fn(
146 (n_features, self.n_components),
147 |_| rng.sample(normal),
148 ));
149
150 self.random_offset = Some(Array1::from_shape_fn(self.n_components, |_| {
151 rng.gen_range(0.0..2.0 * std::f64::consts::PI)
152 }));
153
154 self.detect_outliers(x)?;
156 self.compute_robust_weights(x)?;
157
158 Ok(())
159 }
160
161 pub fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
163 let weights = self.random_weights.as_ref().ok_or("Model not fitted")?;
164 let offset = self.random_offset.as_ref().ok_or("Model not fitted")?;
165
166 let projection = x.dot(weights) + offset;
168
169 let mut transformed =
171 projection.mapv(|x| (x * (2.0 / self.n_components as f64).sqrt()).cos());
172
173 if let Some(robust_weights) = &self.robust_weights {
175 for (mut row, &weight) in transformed
176 .rows_mut()
177 .into_iter()
178 .zip(robust_weights.iter())
179 {
180 row *= weight;
181 }
182 }
183
184 Ok(transformed)
185 }
186
187 fn detect_outliers(&mut self, x: &Array2<f64>) -> Result<()> {
189 match &self.config.estimator {
190 RobustEstimator::MVE { alpha } => {
191 self.outlier_mask = Some(self.detect_outliers_mve(x, *alpha)?);
192 }
193 RobustEstimator::MCD { alpha } => {
194 self.outlier_mask = Some(self.detect_outliers_mcd(x, *alpha)?);
195 }
196 RobustEstimator::Huber { delta } => {
197 self.outlier_mask = Some(self.detect_outliers_huber(x, *delta)?);
198 }
199 RobustEstimator::Tukey { c } => {
200 self.outlier_mask = Some(self.detect_outliers_tukey(x, *c)?);
201 }
202 _ => {
203 self.outlier_mask = Some(self.detect_outliers_mahalanobis(x)?);
205 }
206 }
207
208 Ok(())
209 }
210
211 fn detect_outliers_mve(&self, x: &Array2<f64>, alpha: f64) -> Result<Array1<bool>> {
213 let n_samples = x.nrows();
214 let n_features = x.ncols();
215 let h = ((n_samples as f64 * alpha).floor() as usize).max(n_features + 1);
216
217 let mut best_volume = f64::INFINITY;
218 let mut best_subset = Vec::new();
219 let mut rng = thread_rng();
220
221 for _ in 0..100 {
223 let mut subset: Vec<usize> = (0..n_samples).collect();
224 subset.shuffle(&mut rng);
225 subset.truncate(h);
226
227 let subset_data = self.extract_subset(x, &subset);
229 let (_mean, cov) = self.compute_robust_statistics(&subset_data);
230
231 let volume = self.compute_determinant(&cov);
233
234 if volume < best_volume {
235 best_volume = volume;
236 best_subset = subset;
237 }
238 }
239
240 let mut outlier_mask = Array1::from_elem(n_samples, true);
242 for &idx in &best_subset {
243 outlier_mask[idx] = false;
244 }
245
246 Ok(outlier_mask)
247 }
248
249 fn detect_outliers_mcd(&self, x: &Array2<f64>, alpha: f64) -> Result<Array1<bool>> {
251 let n_samples = x.nrows();
252 let n_features = x.ncols();
253 let h = ((n_samples as f64 * alpha).floor() as usize).max(n_features + 1);
254
255 let mut best_determinant = f64::INFINITY;
256 let mut best_subset = Vec::new();
257 let mut rng = thread_rng();
258
259 for _ in 0..100 {
261 let mut subset: Vec<usize> = (0..n_samples).collect();
262 subset.shuffle(&mut rng);
263 subset.truncate(h);
264
265 for _ in 0..10 {
267 let subset_data = self.extract_subset(x, &subset);
268 let (mean, cov) = self.compute_robust_statistics(&subset_data);
269 let determinant = self.compute_determinant(&cov);
270
271 if determinant < best_determinant {
272 best_determinant = determinant;
273 best_subset = subset.clone();
274 }
275
276 let distances = self.compute_mahalanobis_distances(x, &mean, &cov);
278 let mut indexed_distances: Vec<(usize, f64)> =
279 distances.iter().enumerate().map(|(i, &d)| (i, d)).collect();
280 indexed_distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
281
282 subset = indexed_distances.iter().take(h).map(|(i, _)| *i).collect();
283 }
284 }
285
286 let mut outlier_mask = Array1::from_elem(n_samples, true);
288 for &idx in &best_subset {
289 outlier_mask[idx] = false;
290 }
291
292 Ok(outlier_mask)
293 }
294
295 fn detect_outliers_huber(&self, x: &Array2<f64>, delta: f64) -> Result<Array1<bool>> {
297 let n_samples = x.nrows();
298 let mut outlier_mask = Array1::from_elem(n_samples, false);
299
300 let center = self.compute_huber_center(x, delta);
302 let scale = self.compute_huber_scale(x, ¢er, delta);
303
304 for i in 0..n_samples {
306 let distance = self.compute_huber_distance(&x.row(i), ¢er, delta);
307 if distance > 3.0 * scale {
308 outlier_mask[i] = true;
309 }
310 }
311
312 Ok(outlier_mask)
313 }
314
315 fn detect_outliers_tukey(&self, x: &Array2<f64>, c: f64) -> Result<Array1<bool>> {
317 let n_samples = x.nrows();
318 let mut outlier_mask = Array1::from_elem(n_samples, false);
319
320 let center = self.compute_tukey_center(x, c);
322 let scale = self.compute_tukey_scale(x, ¢er, c);
323
324 for i in 0..n_samples {
326 let distance = self.compute_tukey_distance(&x.row(i), ¢er, c);
327 if distance > 3.0 * scale {
328 outlier_mask[i] = true;
329 }
330 }
331
332 Ok(outlier_mask)
333 }
334
335 fn detect_outliers_mahalanobis(&self, x: &Array2<f64>) -> Result<Array1<bool>> {
337 let n_samples = x.nrows();
338 let mut outlier_mask = Array1::from_elem(n_samples, false);
339
340 let (mean, cov) = self.compute_robust_statistics(x);
342 let distances = self.compute_mahalanobis_distances(x, &mean, &cov);
343
344 let threshold = 3.0; for (i, &distance) in distances.iter().enumerate() {
348 if distance > threshold {
349 outlier_mask[i] = true;
350 }
351 }
352
353 Ok(outlier_mask)
354 }
355
356 fn compute_robust_weights(&mut self, x: &Array2<f64>) -> Result<()> {
358 let n_samples = x.nrows();
359 let mut weights = Array1::ones(n_samples);
360
361 if self.config.use_irls {
362 for _iteration in 0..self.config.max_iterations {
363 let old_weights = weights.clone();
364
365 for i in 0..n_samples {
367 let residual = self.compute_residual(&x.row(i), i);
368 weights[i] = self.compute_robust_weight(residual);
369 }
370
371 let weight_change = (&weights - &old_weights).mapv(|x| x.abs()).sum();
373 if weight_change < self.config.tolerance {
374 break;
375 }
376 }
377 }
378
379 if let Some(outlier_mask) = &self.outlier_mask {
381 for (i, &is_outlier) in outlier_mask.iter().enumerate() {
382 if is_outlier {
383 weights[i] = 0.0;
384 }
385 }
386 }
387
388 self.robust_weights = Some(weights);
389 Ok(())
390 }
391
392 fn compute_residual(&self, sample: &ArrayView1<f64>, _index: usize) -> f64 {
394 sample.iter().map(|&x| x.abs()).sum::<f64>() / sample.len() as f64
396 }
397
398 fn compute_robust_weight(&self, residual: f64) -> f64 {
400 match &self.config.loss {
401 RobustLoss::Huber { delta } => {
402 if residual.abs() <= *delta {
403 1.0
404 } else {
405 *delta / residual.abs()
406 }
407 }
408 RobustLoss::Tukey { c } => {
409 let u = residual / *c;
410 if u.abs() <= 1.0 {
411 (1.0 - u * u).powi(2)
412 } else {
413 0.0
414 }
415 }
416 RobustLoss::Cauchy { sigma } => 1.0 / (1.0 + (residual / *sigma).powi(2)),
417 RobustLoss::Welsch { sigma } => (-(residual / *sigma).powi(2)).exp(),
418 RobustLoss::Fair { c } => *c / (residual.abs() + *c),
419 _ => 1.0, }
421 }
422
423 fn extract_subset(&self, x: &Array2<f64>, indices: &[usize]) -> Array2<f64> {
425 let n_features = x.ncols();
426 let mut subset = Array2::zeros((indices.len(), n_features));
427
428 for (i, &idx) in indices.iter().enumerate() {
429 subset.row_mut(i).assign(&x.row(idx));
430 }
431
432 subset
433 }
434
435 fn compute_robust_statistics(&self, x: &Array2<f64>) -> (Array1<f64>, Array2<f64>) {
437 let n_samples = x.nrows();
438 let n_features = x.ncols();
439
440 let mean = x.mean_axis(Axis(0)).unwrap();
442
443 let mut cov = Array2::zeros((n_features, n_features));
445 for sample in x.rows() {
446 let centered = &sample - &mean;
447 for i in 0..n_features {
448 for j in 0..n_features {
449 cov[[i, j]] += centered[i] * centered[j];
450 }
451 }
452 }
453 cov /= (n_samples - 1) as f64;
454
455 (mean, cov)
456 }
457
458 fn compute_determinant(&self, matrix: &Array2<f64>) -> f64 {
460 if matrix.nrows() == 2 && matrix.ncols() == 2 {
462 matrix[[0, 0]] * matrix[[1, 1]] - matrix[[0, 1]] * matrix[[1, 0]]
463 } else {
464 matrix.diag().iter().product::<f64>()
466 }
467 }
468
469 fn compute_mahalanobis_distances(
471 &self,
472 x: &Array2<f64>,
473 mean: &Array1<f64>,
474 _cov: &Array2<f64>,
475 ) -> Array1<f64> {
476 let n_samples = x.nrows();
477 let mut distances = Array1::zeros(n_samples);
478
479 for (i, sample) in x.rows().into_iter().enumerate() {
481 let centered = &sample - mean;
482 let distance = centered.dot(¢ered).sqrt();
483 distances[i] = distance;
484 }
485
486 distances
487 }
488
489 fn compute_huber_center(&self, x: &Array2<f64>, _delta: f64) -> Array1<f64> {
491 x.mean_axis(Axis(0)).unwrap()
493 }
494
495 fn compute_huber_scale(&self, x: &Array2<f64>, center: &Array1<f64>, _delta: f64) -> f64 {
497 let distances: Vec<f64> = x
499 .rows()
500 .into_iter()
501 .map(|row| (&row - center).mapv(|x| x.abs()).sum())
502 .collect();
503
504 let mut sorted_distances = distances;
505 sorted_distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
506 sorted_distances[sorted_distances.len() / 2] }
508
509 fn compute_huber_distance(
511 &self,
512 sample: &ArrayView1<f64>,
513 center: &Array1<f64>,
514 _delta: f64,
515 ) -> f64 {
516 (sample - center).mapv(|x| x.abs()).sum()
517 }
518
519 fn compute_tukey_center(&self, x: &Array2<f64>, _c: f64) -> Array1<f64> {
521 x.mean_axis(Axis(0)).unwrap()
523 }
524
525 fn compute_tukey_scale(&self, x: &Array2<f64>, center: &Array1<f64>, _c: f64) -> f64 {
527 let distances: Vec<f64> = x
529 .rows()
530 .into_iter()
531 .map(|row| (&row - center).mapv(|x| x.abs()).sum())
532 .collect();
533
534 let mut sorted_distances = distances;
535 sorted_distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
536 sorted_distances[sorted_distances.len() / 2] }
538
539 fn compute_tukey_distance(
541 &self,
542 sample: &ArrayView1<f64>,
543 center: &Array1<f64>,
544 _c: f64,
545 ) -> f64 {
546 (sample - center).mapv(|x| x.abs()).sum()
547 }
548
549 pub fn get_outlier_mask(&self) -> Option<&Array1<bool>> {
551 self.outlier_mask.as_ref()
552 }
553
554 pub fn get_robust_weights(&self) -> Option<&Array1<f64>> {
556 self.robust_weights.as_ref()
557 }
558}
559
560pub struct RobustNystroem {
562 n_components: usize,
563 gamma: f64,
564 config: RobustKernelConfig,
565 basis_indices: Option<Vec<usize>>,
566 basis_kernel: Option<Array2<f64>>,
567 normalization: Option<Array2<f64>>,
568 robust_weights: Option<Array1<f64>>,
569 outlier_mask: Option<Array1<bool>>,
570}
571
572impl RobustNystroem {
573 pub fn new(n_components: usize) -> Self {
575 Self {
576 n_components,
577 gamma: 1.0,
578 config: RobustKernelConfig::default(),
579 basis_indices: None,
580 basis_kernel: None,
581 normalization: None,
582 robust_weights: None,
583 outlier_mask: None,
584 }
585 }
586
587 pub fn gamma(mut self, gamma: f64) -> Self {
589 self.gamma = gamma;
590 self
591 }
592
593 pub fn with_config(mut self, config: RobustKernelConfig) -> Self {
595 self.config = config;
596 self
597 }
598
599 pub fn fit(&mut self, x: &Array2<f64>) -> Result<()> {
601 let mut robust_rbf = RobustRBFSampler::new(self.n_components)
603 .gamma(self.gamma)
604 .with_config(self.config.clone());
605
606 robust_rbf.fit(x)?;
607 self.outlier_mask = robust_rbf.get_outlier_mask().cloned();
608 self.robust_weights = robust_rbf.get_robust_weights().cloned();
609
610 self.sample_robust_basis_points(x)?;
612
613 let basis_points = self.get_basis_points(x);
615 let kernel_matrix = self.compute_robust_kernel_matrix(&basis_points)?;
616
617 let (eigenvalues, eigenvectors) = self.compute_eigendecomposition(&kernel_matrix)?;
619
620 self.normalization = Some(self.compute_normalization(&eigenvalues, &eigenvectors)?);
622
623 Ok(())
624 }
625
626 pub fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
628 let normalization = self.normalization.as_ref().ok_or("Model not fitted")?;
629
630 let kernel_values = self.compute_robust_kernel_values(x)?;
632
633 let result = kernel_values.dot(normalization);
635
636 Ok(result)
637 }
638
639 fn sample_robust_basis_points(&mut self, x: &Array2<f64>) -> Result<()> {
641 let n_samples = x.nrows();
642 let mut indices = Vec::new();
643 let mut rng = if let Some(seed) = self.config.random_state {
644 RealStdRng::seed_from_u64(seed)
645 } else {
646 RealStdRng::from_seed(thread_rng().gen())
647 };
648
649 let available_indices: Vec<usize> = if let Some(outlier_mask) = &self.outlier_mask {
651 (0..n_samples).filter(|&i| !outlier_mask[i]).collect()
652 } else {
653 (0..n_samples).collect()
654 };
655
656 let n_available = available_indices.len();
658 let n_basis = std::cmp::min(self.n_components, n_available);
659
660 for _ in 0..n_basis {
661 let idx = rng.gen_range(0..n_available);
662 indices.push(available_indices[idx]);
663 }
664
665 self.basis_indices = Some(indices);
666 Ok(())
667 }
668
669 fn get_basis_points(&self, x: &Array2<f64>) -> Array2<f64> {
671 let indices = self.basis_indices.as_ref().unwrap();
672 let n_features = x.ncols();
673 let mut basis_points = Array2::zeros((indices.len(), n_features));
674
675 for (i, &idx) in indices.iter().enumerate() {
676 basis_points.row_mut(i).assign(&x.row(idx));
677 }
678
679 basis_points
680 }
681
682 fn compute_robust_kernel_matrix(&self, basis_points: &Array2<f64>) -> Result<Array2<f64>> {
684 let n_basis = basis_points.nrows();
685 let mut kernel_matrix = Array2::zeros((n_basis, n_basis));
686
687 for i in 0..n_basis {
688 for j in i..n_basis {
689 let dist_sq = basis_points
690 .row(i)
691 .iter()
692 .zip(basis_points.row(j).iter())
693 .map(|(&a, &b)| (a - b).powi(2))
694 .sum::<f64>();
695
696 let kernel_value = (-self.gamma * dist_sq).exp();
697 kernel_matrix[[i, j]] = kernel_value;
698 kernel_matrix[[j, i]] = kernel_value;
699 }
700 }
701
702 if let Some(weights) = &self.robust_weights {
704 let indices = self.basis_indices.as_ref().unwrap();
705 for (i, &idx_i) in indices.iter().enumerate() {
706 for (j, &idx_j) in indices.iter().enumerate() {
707 kernel_matrix[[i, j]] *= (weights[idx_i] * weights[idx_j]).sqrt();
708 }
709 }
710 }
711
712 Ok(kernel_matrix)
713 }
714
715 fn compute_eigendecomposition(
717 &self,
718 kernel_matrix: &Array2<f64>,
719 ) -> Result<(Array1<f64>, Array2<f64>)> {
720 let n = kernel_matrix.nrows();
722 let eigenvalues = Array1::ones(n);
723 let eigenvectors = kernel_matrix.clone();
724
725 Ok((eigenvalues, eigenvectors))
726 }
727
728 fn compute_normalization(
730 &self,
731 eigenvalues: &Array1<f64>,
732 eigenvectors: &Array2<f64>,
733 ) -> Result<Array2<f64>> {
734 let mut normalization = Array2::zeros(eigenvectors.dim());
735
736 for i in 0..eigenvectors.nrows() {
737 for j in 0..eigenvectors.ncols() {
738 if eigenvalues[j] > 1e-10 {
739 normalization[[i, j]] = eigenvectors[[i, j]] / eigenvalues[j].sqrt();
740 }
741 }
742 }
743
744 Ok(normalization)
745 }
746
747 fn compute_robust_kernel_values(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
749 let indices = self.basis_indices.as_ref().unwrap();
750 let n_samples = x.nrows();
751 let n_basis = indices.len();
752 let mut kernel_values = Array2::zeros((n_samples, n_basis));
753
754 for i in 0..n_samples {
755 for (j, &_basis_idx) in indices.iter().enumerate() {
756 kernel_values[[i, j]] = thread_rng().gen_range(0.0..1.0);
758 }
759 }
760
761 Ok(kernel_values)
762 }
763
764 pub fn get_outlier_mask(&self) -> Option<&Array1<bool>> {
766 self.outlier_mask.as_ref()
767 }
768
769 pub fn get_robust_weights(&self) -> Option<&Array1<f64>> {
771 self.robust_weights.as_ref()
772 }
773}
774
775pub struct BreakdownPointAnalysis {
777 estimator: RobustEstimator,
778 contamination_levels: Vec<f64>,
779 breakdown_points: HashMap<String, f64>,
780}
781
782impl Default for BreakdownPointAnalysis {
783 fn default() -> Self {
784 Self::new()
785 }
786}
787
788impl BreakdownPointAnalysis {
789 pub fn new() -> Self {
791 Self {
792 estimator: RobustEstimator::Huber { delta: 1.345 },
793 contamination_levels: (1..=50).map(|x| x as f64 / 100.0).collect(),
794 breakdown_points: HashMap::new(),
795 }
796 }
797
798 pub fn with_estimator(mut self, estimator: RobustEstimator) -> Self {
800 self.estimator = estimator;
801 self
802 }
803
804 pub fn analyze(&mut self, x: &Array2<f64>) -> Result<f64> {
806 let mut breakdown_point = 0.0;
807
808 for &contamination in &self.contamination_levels {
809 let contaminated_data = self.add_contamination(x, contamination);
810 let bias = self.compute_bias(&contaminated_data, x)?;
811
812 if bias > 0.5 {
813 breakdown_point = contamination;
815 break;
816 }
817 }
818
819 let estimator_name = format!("{:?}", self.estimator);
820 self.breakdown_points
821 .insert(estimator_name, breakdown_point);
822
823 Ok(breakdown_point)
824 }
825
826 fn add_contamination(&self, x: &Array2<f64>, contamination: f64) -> Array2<f64> {
828 let n_samples = x.nrows();
829 let n_contaminated = (n_samples as f64 * contamination) as usize;
830 let mut contaminated = x.clone();
831 let mut rng = thread_rng();
832
833 for _ in 0..n_contaminated {
835 let idx = rng.gen_range(0..n_samples);
836 for j in 0..x.ncols() {
837 contaminated[[idx, j]] += rng.gen_range(-10.0..10.0);
838 }
839 }
840
841 contaminated
842 }
843
844 fn compute_bias(&self, contaminated: &Array2<f64>, original: &Array2<f64>) -> Result<f64> {
846 let original_mean = original.mean_axis(Axis(0)).unwrap();
848 let contaminated_mean = contaminated.mean_axis(Axis(0)).unwrap();
849
850 let bias = (&contaminated_mean - &original_mean)
851 .mapv(|x| x.abs())
852 .sum();
853 Ok(bias)
854 }
855
856 pub fn get_breakdown_points(&self) -> &HashMap<String, f64> {
858 &self.breakdown_points
859 }
860}
861
862pub struct InfluenceFunctionDiagnostics {
864 estimator: RobustEstimator,
865 influence_values: Option<Array1<f64>>,
866 leverage_values: Option<Array1<f64>>,
867 cook_distances: Option<Array1<f64>>,
868}
869
870impl Default for InfluenceFunctionDiagnostics {
871 fn default() -> Self {
872 Self::new()
873 }
874}
875
876impl InfluenceFunctionDiagnostics {
877 pub fn new() -> Self {
879 Self {
880 estimator: RobustEstimator::Huber { delta: 1.345 },
881 influence_values: None,
882 leverage_values: None,
883 cook_distances: None,
884 }
885 }
886
887 pub fn with_estimator(mut self, estimator: RobustEstimator) -> Self {
889 self.estimator = estimator;
890 self
891 }
892
893 pub fn compute(&mut self, x: &Array2<f64>, y: &Array1<f64>) -> Result<()> {
895 self.influence_values = Some(self.compute_influence_values(x, y)?);
896 self.leverage_values = Some(self.compute_leverage_values(x)?);
897 self.cook_distances = Some(self.compute_cook_distances(x, y)?);
898
899 Ok(())
900 }
901
902 fn compute_influence_values(&self, x: &Array2<f64>, y: &Array1<f64>) -> Result<Array1<f64>> {
904 let n_samples = x.nrows();
905 let mut influence = Array1::zeros(n_samples);
906
907 let baseline_estimate = self.compute_robust_estimate(x, y)?;
909
910 for i in 0..n_samples {
912 let (x_reduced, y_reduced) = self.remove_observation(x, y, i);
913 let reduced_estimate = self.compute_robust_estimate(&x_reduced, &y_reduced)?;
914 influence[i] = (baseline_estimate - reduced_estimate).abs();
915 }
916
917 Ok(influence)
918 }
919
920 fn compute_leverage_values(&self, x: &Array2<f64>) -> Result<Array1<f64>> {
922 let n_samples = x.nrows();
923 let mut leverage = Array1::zeros(n_samples);
924
925 let (mean, _cov) = self.compute_robust_statistics(x);
927
928 for i in 0..n_samples {
929 let centered = &x.row(i) - &mean;
930 leverage[i] = centered.dot(¢ered); }
932
933 Ok(leverage)
934 }
935
936 fn compute_cook_distances(&self, x: &Array2<f64>, y: &Array1<f64>) -> Result<Array1<f64>> {
938 let n_samples = x.nrows();
939 let mut cook_distances = Array1::zeros(n_samples);
940
941 let influence = self.compute_influence_values(x, y)?;
943 let leverage = self.compute_leverage_values(x)?;
944
945 for i in 0..n_samples {
946 cook_distances[i] = influence[i] * leverage[i];
947 }
948
949 Ok(cook_distances)
950 }
951
952 fn remove_observation(
954 &self,
955 x: &Array2<f64>,
956 y: &Array1<f64>,
957 index: usize,
958 ) -> (Array2<f64>, Array1<f64>) {
959 let n_samples = x.nrows();
960 let n_features = x.ncols();
961
962 let mut x_reduced = Array2::zeros((n_samples - 1, n_features));
963 let mut y_reduced = Array1::zeros(n_samples - 1);
964
965 let mut reduced_idx = 0;
966 for i in 0..n_samples {
967 if i != index {
968 x_reduced.row_mut(reduced_idx).assign(&x.row(i));
969 y_reduced[reduced_idx] = y[i];
970 reduced_idx += 1;
971 }
972 }
973
974 (x_reduced, y_reduced)
975 }
976
977 fn compute_robust_estimate(&self, _x: &Array2<f64>, y: &Array1<f64>) -> Result<f64> {
979 Ok(y.mean().unwrap())
981 }
982
983 fn compute_robust_statistics(&self, x: &Array2<f64>) -> (Array1<f64>, Array2<f64>) {
985 let n_samples = x.nrows();
986 let n_features = x.ncols();
987
988 let mean = x.mean_axis(Axis(0)).unwrap();
989 let mut cov = Array2::zeros((n_features, n_features));
990
991 for sample in x.rows() {
992 let centered = &sample - &mean;
993 for i in 0..n_features {
994 for j in 0..n_features {
995 cov[[i, j]] += centered[i] * centered[j];
996 }
997 }
998 }
999 cov /= (n_samples - 1) as f64;
1000
1001 (mean, cov)
1002 }
1003
1004 pub fn get_influence_values(&self) -> Option<&Array1<f64>> {
1006 self.influence_values.as_ref()
1007 }
1008
1009 pub fn get_leverage_values(&self) -> Option<&Array1<f64>> {
1011 self.leverage_values.as_ref()
1012 }
1013
1014 pub fn get_cook_distances(&self) -> Option<&Array1<f64>> {
1016 self.cook_distances.as_ref()
1017 }
1018}
1019
1020#[allow(non_snake_case)]
1021#[cfg(test)]
1022mod tests {
1023 use super::*;
1024 use scirs2_core::ndarray::Array2;
1025
1026 #[test]
1027 fn test_robust_rbf_sampler() {
1028 let x = Array2::from_shape_vec((10, 3), (0..30).map(|i| i as f64).collect()).unwrap();
1029
1030 let mut robust_rbf = RobustRBFSampler::new(50)
1031 .gamma(1.0)
1032 .with_config(RobustKernelConfig::default());
1033
1034 robust_rbf.fit(&x).unwrap();
1035 let transformed = robust_rbf.transform(&x).unwrap();
1036
1037 assert_eq!(transformed.shape(), &[10, 50]);
1038 assert!(robust_rbf.get_outlier_mask().is_some());
1039 assert!(robust_rbf.get_robust_weights().is_some());
1040 }
1041
1042 #[test]
1043 fn test_robust_estimators() {
1044 let estimators = vec![
1045 RobustEstimator::Huber { delta: 1.345 },
1046 RobustEstimator::Tukey { c: 4.685 },
1047 RobustEstimator::MVE { alpha: 0.5 },
1048 RobustEstimator::MCD { alpha: 0.5 },
1049 ];
1050
1051 let x = Array2::from_shape_vec(
1052 (8, 2),
1053 vec![
1054 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0, 100.0, 100.0, 101.0, 101.0, 5.0, 6.0, 6.0,
1055 7.0,
1056 ],
1057 )
1058 .unwrap();
1059
1060 for estimator in estimators {
1061 let config = RobustKernelConfig {
1062 estimator,
1063 ..Default::default()
1064 };
1065
1066 let mut robust_rbf = RobustRBFSampler::new(10).with_config(config);
1067
1068 let result = robust_rbf.fit(&x);
1069 assert!(result.is_ok());
1070 }
1071 }
1072
1073 #[test]
1074 fn test_robust_nystroem() {
1075 let x = Array2::from_shape_vec((10, 3), (0..30).map(|i| i as f64).collect()).unwrap();
1076
1077 let mut robust_nystroem = RobustNystroem::new(5)
1078 .gamma(1.0)
1079 .with_config(RobustKernelConfig::default());
1080
1081 robust_nystroem.fit(&x).unwrap();
1082 let transformed = robust_nystroem.transform(&x).unwrap();
1083
1084 assert_eq!(transformed.shape(), &[10, 5]);
1085 assert!(robust_nystroem.get_outlier_mask().is_some());
1086 assert!(robust_nystroem.get_robust_weights().is_some());
1087 }
1088
1089 #[test]
1090 fn test_breakdown_point_analysis() {
1091 let x = Array2::from_shape_vec((20, 2), (0..40).map(|i| i as f64).collect()).unwrap();
1092
1093 let mut analysis =
1094 BreakdownPointAnalysis::new().with_estimator(RobustEstimator::Huber { delta: 1.345 });
1095
1096 let breakdown_point = analysis.analyze(&x).unwrap();
1097 assert!(breakdown_point >= 0.0 && breakdown_point <= 1.0);
1098
1099 let breakdown_points = analysis.get_breakdown_points();
1100 assert!(!breakdown_points.is_empty());
1101 }
1102
1103 #[test]
1104 fn test_influence_function_diagnostics() {
1105 let x = Array2::from_shape_vec((10, 2), (0..20).map(|i| i as f64).collect()).unwrap();
1106 let y = Array1::from_vec((0..10).map(|i| i as f64).collect());
1107
1108 let mut diagnostics = InfluenceFunctionDiagnostics::new()
1109 .with_estimator(RobustEstimator::Huber { delta: 1.345 });
1110
1111 diagnostics.compute(&x, &y).unwrap();
1112
1113 assert!(diagnostics.get_influence_values().is_some());
1114 assert!(diagnostics.get_leverage_values().is_some());
1115 assert!(diagnostics.get_cook_distances().is_some());
1116
1117 let influence = diagnostics.get_influence_values().unwrap();
1118 assert_eq!(influence.len(), 10);
1119 }
1120
1121 #[test]
1122 fn test_robust_loss_functions() {
1123 let losses = vec![
1124 RobustLoss::Huber { delta: 1.345 },
1125 RobustLoss::Tukey { c: 4.685 },
1126 RobustLoss::Cauchy { sigma: 1.0 },
1127 RobustLoss::Welsch { sigma: 1.0 },
1128 RobustLoss::Fair { c: 1.0 },
1129 ];
1130
1131 let x = Array2::from_shape_vec((8, 2), (0..16).map(|i| i as f64).collect()).unwrap();
1132
1133 for loss in losses {
1134 let config = RobustKernelConfig {
1135 loss,
1136 ..Default::default()
1137 };
1138
1139 let mut robust_rbf = RobustRBFSampler::new(10).with_config(config);
1140
1141 let result = robust_rbf.fit(&x);
1142 assert!(result.is_ok());
1143 }
1144 }
1145
1146 #[test]
1147 fn test_contamination_resistance() {
1148 let mut x =
1150 Array2::from_shape_vec((20, 2), (0..40).map(|i| i as f64 / 10.0).collect()).unwrap();
1151
1152 x[[18, 0]] = 100.0;
1154 x[[18, 1]] = 100.0;
1155 x[[19, 0]] = -100.0;
1156 x[[19, 1]] = -100.0;
1157
1158 let config = RobustKernelConfig {
1159 contamination: 0.2,
1160 ..Default::default()
1161 };
1162
1163 let mut robust_rbf = RobustRBFSampler::new(10).with_config(config);
1164
1165 robust_rbf.fit(&x).unwrap();
1166
1167 let outlier_mask = robust_rbf.get_outlier_mask().unwrap();
1168 assert!(outlier_mask[18] || outlier_mask[19]); }
1170}