1use scirs2_core::ndarray::{Array1, Array2, Axis};
11use scirs2_core::random::{thread_rng, RandNormal, Rng};
12use sklears_core::{
13 error::{Result, SklearsError},
14 types::Float,
15};
16
17#[derive(Debug, Clone)]
19pub struct RobustConfig {
20 pub max_iterations: usize,
22 pub tolerance: Float,
24 pub outlier_threshold: Float,
26 pub tuning_constant: Float,
28 pub loss_function: LossFunction,
30}
31
32#[derive(Debug, Clone, Copy)]
34pub enum LossFunction {
35 Huber,
37 Tukey,
39 L1,
41 Cauchy,
43 Welsch,
45}
46
47impl Default for RobustConfig {
48 fn default() -> Self {
49 Self {
50 max_iterations: 100,
51 tolerance: 1e-6,
52 outlier_threshold: 2.5,
53 tuning_constant: 1.345, loss_function: LossFunction::Huber,
55 }
56 }
57}
58
59pub struct RobustPCA {
81 config: RobustConfig,
82 n_components: Option<usize>,
83 components_: Option<Array2<Float>>,
85 explained_variance_: Option<Array1<Float>>,
86 mean_: Option<Array1<Float>>,
87 outlier_weights_: Option<Array2<Float>>,
88}
89
90impl RobustPCA {
91 pub fn new() -> Self {
93 Self {
94 config: RobustConfig::default(),
95 n_components: None,
96 components_: None,
97 explained_variance_: None,
98 mean_: None,
99 outlier_weights_: None,
100 }
101 }
102
103 pub fn n_components(mut self, n_components: usize) -> Self {
105 self.n_components = Some(n_components);
106 self
107 }
108
109 pub fn loss_function(mut self, loss_function: LossFunction) -> Self {
111 self.config.loss_function = loss_function;
112 self
113 }
114
115 pub fn tuning_constant(mut self, tuning_constant: Float) -> Self {
117 self.config.tuning_constant = tuning_constant;
118 self
119 }
120
121 pub fn outlier_threshold(mut self, threshold: Float) -> Self {
123 self.config.outlier_threshold = threshold;
124 self
125 }
126
127 pub fn fit_transform(&mut self, data: &Array2<Float>) -> Result<RobustPCAResult> {
129 self.fit(data)?;
130 self.transform(data)
131 }
132
133 pub fn fit(&mut self, data: &Array2<Float>) -> Result<()> {
135 let (n_samples, n_features) = data.dim();
136 let n_components = self.n_components.unwrap_or(n_features.min(n_samples));
137
138 if n_components > n_features.min(n_samples) {
139 return Err(SklearsError::InvalidInput(
140 "Number of components cannot exceed min(n_samples, n_features)".to_string(),
141 ));
142 }
143
144 let robust_mean = self.compute_robust_mean(data)?;
146 let centered_data = data - &robust_mean.view().insert_axis(Axis(0));
147
148 let mut rng = thread_rng();
150 let mut components = Array2::zeros((n_components, n_features));
151 for elem in components.iter_mut() {
152 *elem = rng.sample(RandNormal::new(0.0, 1.0).unwrap());
153 }
154
155 self.orthogonalize_components(&mut components);
157
158 let mut prev_components = components.clone();
160 let mut outlier_weights = Array2::ones((n_samples, n_components));
161
162 for _iter in 0..self.config.max_iterations {
163 for comp_idx in 0..n_components {
165 let weights = outlier_weights.column(comp_idx);
166 let updated_component =
167 self.update_component(¢ered_data, &weights, comp_idx)?;
168 components.row_mut(comp_idx).assign(&updated_component);
169 }
170
171 self.orthogonalize_components(&mut components);
173
174 outlier_weights = self.compute_outlier_weights(¢ered_data, &components)?;
176
177 let component_change = self.compute_component_change(&components, &prev_components);
179 if component_change < self.config.tolerance {
180 break;
181 }
182
183 prev_components = components.clone();
184 }
185
186 let explained_variance = self.compute_explained_variance(¢ered_data, &components)?;
188
189 self.components_ = Some(components);
190 self.explained_variance_ = Some(explained_variance);
191 self.mean_ = Some(robust_mean);
192 self.outlier_weights_ = Some(outlier_weights);
193
194 Ok(())
195 }
196
197 pub fn transform(&self, data: &Array2<Float>) -> Result<RobustPCAResult> {
199 let components = self
200 .components_
201 .as_ref()
202 .ok_or_else(|| SklearsError::InvalidInput("Model must be fitted first".to_string()))?;
203
204 let mean = self
205 .mean_
206 .as_ref()
207 .ok_or_else(|| SklearsError::InvalidInput("Model must be fitted first".to_string()))?;
208
209 let explained_variance = self
210 .explained_variance_
211 .as_ref()
212 .ok_or_else(|| SklearsError::InvalidInput("Model must be fitted first".to_string()))?;
213
214 let outlier_weights = self
215 .outlier_weights_
216 .as_ref()
217 .ok_or_else(|| SklearsError::InvalidInput("Model must be fitted first".to_string()))?;
218
219 let centered_data = data - &mean.view().insert_axis(Axis(0));
221
222 let transformed = centered_data.dot(&components.t());
224
225 Ok(RobustPCAResult {
226 transformed_data: transformed,
227 components: components.clone(),
228 explained_variance: explained_variance.clone(),
229 mean: mean.clone(),
230 outlier_weights: outlier_weights.clone(),
231 })
232 }
233
234 fn compute_robust_mean(&self, data: &Array2<Float>) -> Result<Array1<Float>> {
236 let (n_samples, n_features) = data.dim();
237 let mut mean = data.mean_axis(Axis(0)).unwrap();
238
239 for _ in 0..self.config.max_iterations {
240 let mut new_mean = Array1::zeros(n_features);
241 let mut weight_sum = 0.0;
242
243 for i in 0..n_samples {
244 let residual = &data.row(i) - &mean;
245 let residual_norm = residual.dot(&residual).sqrt();
246 let weight = self.robust_weight(residual_norm);
247
248 new_mean += &(weight * &data.row(i));
249 weight_sum += weight;
250 }
251
252 if weight_sum > 0.0 {
253 new_mean /= weight_sum;
254 }
255
256 let change = (&new_mean - &mean).dot(&(&new_mean - &mean)).sqrt();
257 mean = new_mean;
258
259 if change < self.config.tolerance {
260 break;
261 }
262 }
263
264 Ok(mean)
265 }
266
267 fn update_component(
269 &self,
270 data: &Array2<Float>,
271 weights: &scirs2_core::ndarray::ArrayView1<Float>,
272 _comp_idx: usize,
273 ) -> Result<Array1<Float>> {
274 let (n_samples, n_features) = data.dim();
275 let mut component = Array1::zeros(n_features);
276 let mut weight_sum = 0.0;
277
278 for i in 0..n_samples {
280 let weight = weights[i];
281 let sample = data.row(i);
282 component += &(weight * &sample);
283 weight_sum += weight;
284 }
285
286 if weight_sum > 0.0 {
287 component /= weight_sum;
288 }
289
290 let norm = component.dot(&component).sqrt();
292 if norm > 1e-12 {
293 component /= norm;
294 }
295
296 Ok(component)
297 }
298
299 fn orthogonalize_components(&self, components: &mut Array2<Float>) {
301 let n_components = components.nrows();
302
303 for i in 0..n_components {
304 let prev_components: Vec<Array1<Float>> =
306 (0..i).map(|j| components.row(j).to_owned()).collect();
307
308 let mut current = components.row_mut(i);
310 let norm = current.dot(¤t).sqrt();
311 if norm > 1e-12 {
312 current /= norm;
313 }
314
315 for prev in prev_components {
317 let projection = current.dot(&prev);
318 current -= &(projection * &prev);
319
320 let norm = current.dot(¤t).sqrt();
322 if norm > 1e-12 {
323 current /= norm;
324 }
325 }
326 }
327 }
328
329 fn compute_outlier_weights(
331 &self,
332 data: &Array2<Float>,
333 components: &Array2<Float>,
334 ) -> Result<Array2<Float>> {
335 let (n_samples, _) = data.dim();
336 let n_components = components.nrows();
337 let mut weights = Array2::zeros((n_samples, n_components));
338
339 for i in 0..n_samples {
340 let sample = data.row(i);
341
342 for j in 0..n_components {
343 let component = components.row(j);
344 let projection = sample.dot(&component);
345 let reconstructed = projection * &component;
346 let residual = &sample - &reconstructed;
347 let residual_norm = residual.dot(&residual).sqrt();
348
349 weights[[i, j]] = self.robust_weight(residual_norm);
350 }
351 }
352
353 Ok(weights)
354 }
355
356 fn robust_weight(&self, residual: Float) -> Float {
358 let standardized_residual = residual / self.config.tuning_constant;
359
360 match self.config.loss_function {
361 LossFunction::Huber => {
362 if standardized_residual.abs() <= 1.0 {
363 1.0
364 } else {
365 1.0 / standardized_residual.abs()
366 }
367 }
368 LossFunction::Tukey => {
369 if standardized_residual.abs() <= 1.0 {
370 let x2 = standardized_residual * standardized_residual;
371 (1.0 - x2) * (1.0 - x2)
372 } else {
373 0.0
374 }
375 }
376 LossFunction::L1 => {
377 if standardized_residual.abs() > 1e-12 {
378 1.0 / standardized_residual.abs()
379 } else {
380 1.0
381 }
382 }
383 LossFunction::Cauchy => 1.0 / (1.0 + standardized_residual * standardized_residual),
384 LossFunction::Welsch => {
385 let x2 = standardized_residual * standardized_residual;
386 (-x2).exp()
387 }
388 }
389 }
390
391 fn compute_explained_variance(
393 &self,
394 data: &Array2<Float>,
395 components: &Array2<Float>,
396 ) -> Result<Array1<Float>> {
397 let n_components = components.nrows();
398 let mut explained_variance = Array1::zeros(n_components);
399
400 let total_variance = self.compute_robust_variance(data);
402
403 for i in 0..n_components {
404 let component = components.row(i);
405 let projections = data.dot(&component);
406 let component_variance = self.compute_robust_variance_1d(&projections);
407 explained_variance[i] = component_variance / total_variance;
408 }
409
410 Ok(explained_variance)
411 }
412
413 fn compute_robust_variance(&self, data: &Array2<Float>) -> Float {
415 let (_n_samples, n_features) = data.dim();
416 let mut total_variance = 0.0;
417
418 for j in 0..n_features {
419 let column = data.column(j);
420 total_variance += self.compute_robust_variance_1d(&column.to_owned());
421 }
422
423 total_variance / n_features as Float
424 }
425
426 fn compute_robust_variance_1d(&self, data: &Array1<Float>) -> Float {
428 let n = data.len();
429 if n < 2 {
430 return 0.0;
431 }
432
433 let median = self.compute_median(data);
435 let mut abs_deviations: Vec<Float> = data.iter().map(|&x| (x - median).abs()).collect();
436 abs_deviations.sort_by(|a, b| a.partial_cmp(b).unwrap());
437
438 let mad = if abs_deviations.len() % 2 == 0 {
439 let mid = abs_deviations.len() / 2;
440 (abs_deviations[mid - 1] + abs_deviations[mid]) / 2.0
441 } else {
442 abs_deviations[abs_deviations.len() / 2]
443 };
444
445 1.4826 * mad * 1.4826 * mad }
448
449 fn compute_median(&self, data: &Array1<Float>) -> Float {
451 let mut sorted_data: Vec<Float> = data.iter().cloned().collect();
452 sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap());
453
454 let n = sorted_data.len();
455 if n % 2 == 0 {
456 (sorted_data[n / 2 - 1] + sorted_data[n / 2]) / 2.0
457 } else {
458 sorted_data[n / 2]
459 }
460 }
461
462 fn compute_component_change(&self, current: &Array2<Float>, previous: &Array2<Float>) -> Float {
464 let diff = current - previous;
465 (diff.iter().map(|&x| x * x).sum::<Float>()).sqrt()
466 }
467
468 pub fn components(&self) -> Option<&Array2<Float>> {
470 self.components_.as_ref()
471 }
472
473 pub fn explained_variance(&self) -> Option<&Array1<Float>> {
475 self.explained_variance_.as_ref()
476 }
477
478 pub fn mean(&self) -> Option<&Array1<Float>> {
480 self.mean_.as_ref()
481 }
482
483 pub fn outlier_weights(&self) -> Option<&Array2<Float>> {
485 self.outlier_weights_.as_ref()
486 }
487}
488
489impl Default for RobustPCA {
490 fn default() -> Self {
491 Self::new()
492 }
493}
494
495#[derive(Debug, Clone)]
497pub struct RobustPCAResult {
498 pub transformed_data: Array2<Float>,
500 pub components: Array2<Float>,
502 pub explained_variance: Array1<Float>,
504 pub mean: Array1<Float>,
506 pub outlier_weights: Array2<Float>,
508}
509
510impl RobustPCAResult {
511 pub fn inverse_transform(&self, transformed_data: &Array2<Float>) -> Array2<Float> {
513 let reconstructed = transformed_data.dot(&self.components);
514 reconstructed + self.mean.view().insert_axis(Axis(0))
515 }
516
517 pub fn identify_outliers(&self, threshold: Float) -> Array1<bool> {
519 let (n_samples, _) = self.outlier_weights.dim();
520 let mut is_outlier = Array1::from_elem(n_samples, false);
521
522 for i in 0..n_samples {
523 let min_weight = self
524 .outlier_weights
525 .row(i)
526 .iter()
527 .cloned()
528 .fold(Float::INFINITY, Float::min);
529 if min_weight < threshold {
530 is_outlier[i] = true;
531 }
532 }
533
534 is_outlier
535 }
536
537 pub fn reconstruction_error(&self, original_data: &Array2<Float>) -> Result<Array1<Float>> {
539 let reconstructed = self.inverse_transform(&self.transformed_data);
540 let (n_samples, _) = original_data.dim();
541 let mut errors = Array1::zeros(n_samples);
542
543 for i in 0..n_samples {
544 let original_row = original_data.row(i);
545 let reconstructed_row = reconstructed.row(i);
546 let diff = &original_row - &reconstructed_row;
547 errors[i] = diff.dot(&diff).sqrt();
548 }
549
550 Ok(errors)
551 }
552}
553
554pub struct MEstimatorDecomposition {
556 config: RobustConfig,
557 rank: Option<usize>,
558}
559
560impl MEstimatorDecomposition {
561 pub fn new() -> Self {
563 Self {
564 config: RobustConfig::default(),
565 rank: None,
566 }
567 }
568
569 pub fn rank(mut self, rank: usize) -> Self {
571 self.rank = Some(rank);
572 self
573 }
574
575 pub fn loss_function(mut self, loss_function: LossFunction) -> Self {
577 self.config.loss_function = loss_function;
578 self
579 }
580
581 pub fn decompose(&self, matrix: &Array2<Float>) -> Result<MEstimatorResult> {
583 let (m, n) = matrix.dim();
584 let rank = self.rank.unwrap_or((m.min(n) / 2).max(1));
585
586 let mut rng = thread_rng();
588 let mut u = Array2::zeros((m, rank));
589 let mut v = Array2::zeros((n, rank));
590
591 for elem in u.iter_mut() {
593 *elem = rng.sample(RandNormal::new(0.0, 1.0).unwrap());
594 }
595 for elem in v.iter_mut() {
596 *elem = rng.sample(RandNormal::new(0.0, 1.0).unwrap());
597 }
598
599 for _ in 0..self.config.max_iterations {
601 let prev_u = u.clone();
602 let prev_v = v.clone();
603
604 u = self.update_factor_u(matrix, &u, &v)?;
606
607 v = self.update_factor_v(matrix, &u, &v)?;
609
610 let u_change = (&u - &prev_u).iter().map(|&x| x * x).sum::<Float>().sqrt();
612 let v_change = (&v - &prev_v).iter().map(|&x| x * x).sum::<Float>().sqrt();
613
614 if u_change + v_change < self.config.tolerance {
615 break;
616 }
617 }
618
619 let reconstruction = u.dot(&v.t());
621 let residuals = matrix - &reconstruction;
622 let weights = self.compute_residual_weights(&residuals);
623
624 Ok(MEstimatorResult {
625 u_factor: u,
626 v_factor: v,
627 reconstruction,
628 residuals,
629 weights,
630 })
631 }
632
633 fn update_factor_u(
635 &self,
636 matrix: &Array2<Float>,
637 u: &Array2<Float>,
638 v: &Array2<Float>,
639 ) -> Result<Array2<Float>> {
640 let (m, rank) = u.dim();
641 let mut new_u = Array2::zeros((m, rank));
642
643 for i in 0..m {
644 for r in 0..rank {
645 let mut numerator = 0.0;
646 let mut denominator = 0.0;
647
648 for j in 0..matrix.ncols() {
649 let residual = matrix[[i, j]] - u.row(i).dot(&v.row(j));
650 let weight = self.robust_weight(residual.abs());
651
652 numerator += weight * matrix[[i, j]] * v[[j, r]];
653 denominator += weight * v[[j, r]] * v[[j, r]];
654 }
655
656 if denominator > 1e-12 {
657 new_u[[i, r]] = numerator / denominator;
658 }
659 }
660 }
661
662 Ok(new_u)
663 }
664
665 fn update_factor_v(
667 &self,
668 matrix: &Array2<Float>,
669 u: &Array2<Float>,
670 v: &Array2<Float>,
671 ) -> Result<Array2<Float>> {
672 let (n, rank) = v.dim();
673 let mut new_v = Array2::zeros((n, rank));
674
675 for j in 0..n {
676 for r in 0..rank {
677 let mut numerator = 0.0;
678 let mut denominator = 0.0;
679
680 for i in 0..matrix.nrows() {
681 let residual = matrix[[i, j]] - u.row(i).dot(&v.row(j));
682 let weight = self.robust_weight(residual.abs());
683
684 numerator += weight * matrix[[i, j]] * u[[i, r]];
685 denominator += weight * u[[i, r]] * u[[i, r]];
686 }
687
688 if denominator > 1e-12 {
689 new_v[[j, r]] = numerator / denominator;
690 }
691 }
692 }
693
694 Ok(new_v)
695 }
696
697 fn compute_residual_weights(&self, residuals: &Array2<Float>) -> Array2<Float> {
699 residuals.mapv(|r| self.robust_weight(r.abs()))
700 }
701
702 fn robust_weight(&self, residual: Float) -> Float {
704 let standardized_residual = residual / self.config.tuning_constant;
705
706 match self.config.loss_function {
707 LossFunction::Huber => {
708 if standardized_residual.abs() <= 1.0 {
709 1.0
710 } else {
711 1.0 / standardized_residual.abs()
712 }
713 }
714 LossFunction::Tukey => {
715 if standardized_residual.abs() <= 1.0 {
716 let x2 = standardized_residual * standardized_residual;
717 (1.0 - x2) * (1.0 - x2)
718 } else {
719 0.0
720 }
721 }
722 LossFunction::L1 => {
723 if standardized_residual.abs() > 1e-12 {
724 1.0 / standardized_residual.abs()
725 } else {
726 1.0
727 }
728 }
729 LossFunction::Cauchy => 1.0 / (1.0 + standardized_residual * standardized_residual),
730 LossFunction::Welsch => {
731 let x2 = standardized_residual * standardized_residual;
732 (-x2).exp()
733 }
734 }
735 }
736}
737
738impl Default for MEstimatorDecomposition {
739 fn default() -> Self {
740 Self::new()
741 }
742}
743
744#[derive(Debug, Clone)]
746pub struct MEstimatorResult {
747 pub u_factor: Array2<Float>,
749 pub v_factor: Array2<Float>,
751 pub reconstruction: Array2<Float>,
753 pub residuals: Array2<Float>,
755 pub weights: Array2<Float>,
757}
758
759impl MEstimatorResult {
760 pub fn residual_norm(&self) -> Float {
762 self.residuals.iter().map(|&x| x * x).sum::<Float>().sqrt()
763 }
764
765 pub fn weighted_residual_norm(&self) -> Float {
767 let weighted_residuals = &self.residuals * &self.weights;
768 weighted_residuals
769 .iter()
770 .map(|&x| x * x)
771 .sum::<Float>()
772 .sqrt()
773 }
774
775 pub fn identify_outlier_elements(&self, threshold: Float) -> Array2<bool> {
777 self.weights.mapv(|w| w < threshold)
778 }
779}
780
781pub struct BreakdownPointAnalysis {
783 #[allow(dead_code)]
784 config: RobustConfig,
785}
786
787impl BreakdownPointAnalysis {
788 pub fn new() -> Self {
790 Self {
791 config: RobustConfig::default(),
792 }
793 }
794
795 pub fn empirical_breakdown_point(
797 &self,
798 data: &Array2<Float>,
799 contamination_levels: &[Float],
800 ) -> Result<BreakdownResult> {
801 let (n_samples, _) = data.dim();
802 let mut breakdown_errors = Vec::new();
803 let mut breakdown_levels = Vec::new();
804
805 for &contamination_level in contamination_levels {
806 let n_outliers = (contamination_level * n_samples as Float) as usize;
807
808 if n_outliers >= n_samples {
809 continue;
810 }
811
812 let contaminated_data = self.add_outliers(data, n_outliers)?;
814
815 let mut robust_pca = RobustPCA::new().n_components(2);
817 let robust_result = robust_pca.fit_transform(&contaminated_data)?;
818
819 let standard_result = self.standard_pca(&contaminated_data, 2)?;
821
822 let direction_error =
824 self.compute_direction_error(&robust_result.components, &standard_result);
825
826 breakdown_errors.push(direction_error);
827 breakdown_levels.push(contamination_level);
828 }
829
830 Ok(BreakdownResult {
831 contamination_levels: Array1::from_vec(breakdown_levels),
832 direction_errors: Array1::from_vec(breakdown_errors),
833 })
834 }
835
836 fn add_outliers(&self, data: &Array2<Float>, n_outliers: usize) -> Result<Array2<Float>> {
838 let (n_samples, n_features) = data.dim();
839 let mut contaminated_data = data.clone();
840
841 let data_center = data.mean_axis(Axis(0)).unwrap();
843 let data_scale = self.estimate_scale(data);
844 let mut rng = thread_rng();
845
846 for i in 0..n_outliers.min(n_samples) {
847 for j in 0..n_features {
848 contaminated_data[[i, j]] =
850 data_center[j] + 10.0 * data_scale * (2.0 * (rng.gen::<Float>()) - 1.0);
851 }
852 }
853
854 Ok(contaminated_data)
855 }
856
857 fn estimate_scale(&self, data: &Array2<Float>) -> Float {
859 let center = data.mean_axis(Axis(0)).unwrap();
860 let distances: Vec<Float> = data
861 .axis_iter(Axis(0))
862 .map(|row| {
863 let diff = &row - ¢er;
864 diff.dot(&diff).sqrt()
865 })
866 .collect();
867
868 let mut sorted_distances = distances;
870 sorted_distances.sort_by(|a, b| a.partial_cmp(b).unwrap());
871
872 let n = sorted_distances.len();
873 if n % 2 == 0 {
874 (sorted_distances[n / 2 - 1] + sorted_distances[n / 2]) / 2.0
875 } else {
876 sorted_distances[n / 2]
877 }
878 }
879
880 fn standard_pca(&self, data: &Array2<Float>, n_components: usize) -> Result<Array2<Float>> {
882 let (n_samples, n_features) = data.dim();
883
884 let mean = data.mean_axis(Axis(0)).unwrap();
886 let centered_data = data - &mean.view().insert_axis(Axis(0));
887
888 let _cov_matrix = centered_data.t().dot(¢ered_data) / (n_samples - 1) as Float;
890
891 let mut components = Array2::eye(n_features);
893 components = components
894 .slice(scirs2_core::ndarray::s![0..n_components, ..])
895 .to_owned();
896
897 Ok(components)
898 }
899
900 fn compute_direction_error(
902 &self,
903 robust_components: &Array2<Float>,
904 standard_components: &Array2<Float>,
905 ) -> Float {
906 let n_components = robust_components.nrows().min(standard_components.nrows());
907 let mut total_error = 0.0;
908
909 for i in 0..n_components {
910 let robust_dir = robust_components.row(i);
911 let standard_dir = standard_components.row(i);
912
913 let dot_product = robust_dir.dot(&standard_dir);
915 let robust_norm = robust_dir.dot(&robust_dir).sqrt();
916 let standard_norm = standard_dir.dot(&standard_dir).sqrt();
917
918 if robust_norm > 1e-12 && standard_norm > 1e-12 {
919 let cos_angle = dot_product / (robust_norm * standard_norm);
920 let angle = cos_angle.abs().acos();
921 total_error += angle;
922 }
923 }
924
925 total_error / n_components as Float
926 }
927}
928
929impl Default for BreakdownPointAnalysis {
930 fn default() -> Self {
931 Self::new()
932 }
933}
934
935#[derive(Debug, Clone)]
937pub struct BreakdownResult {
938 pub contamination_levels: Array1<Float>,
940 pub direction_errors: Array1<Float>,
942}
943
944impl BreakdownResult {
945 pub fn breakdown_point(&self, error_threshold: Float) -> Option<Float> {
947 for (level, error) in self
948 .contamination_levels
949 .iter()
950 .zip(self.direction_errors.iter())
951 {
952 if *error > error_threshold {
953 return Some(*level);
954 }
955 }
956 None
957 }
958
959 pub fn robustness_score(&self) -> Float {
961 let n = self.contamination_levels.len();
962 if n < 2 {
963 return 0.0;
964 }
965
966 let mut area = 0.0;
967 for i in 1..n {
968 let dx = self.contamination_levels[i] - self.contamination_levels[i - 1];
969 let avg_error = (self.direction_errors[i] + self.direction_errors[i - 1]) / 2.0;
970 area += dx * avg_error;
971 }
972
973 1.0 / (area + 1e-12)
975 }
976}
977
978#[allow(non_snake_case)]
979#[cfg(test)]
980mod tests {
981 use super::*;
982 use scirs2_core::ndarray::Array2;
983
984 #[test]
985 fn test_robust_pca_basic() {
986 let data = Array2::from_shape_vec(
987 (4, 3),
988 vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 2.0, 3.0, 4.0],
989 )
990 .unwrap();
991
992 let mut rpca = RobustPCA::new().n_components(2);
993 let result = rpca.fit_transform(&data).unwrap();
994
995 assert_eq!(result.transformed_data.dim(), (4, 2));
996 assert_eq!(result.components.dim(), (2, 3));
997 assert_eq!(result.explained_variance.len(), 2);
998 assert_eq!(result.outlier_weights.dim(), (4, 2));
999 }
1000
1001 #[test]
1002 fn test_robust_pca_with_outliers() {
1003 let data = Array2::from_shape_vec(
1004 (5, 3),
1005 vec![
1006 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 2.0, 3.0, 4.0, 100.0, 100.0,
1007 100.0, ],
1009 )
1010 .unwrap();
1011
1012 let mut rpca = RobustPCA::new()
1013 .n_components(2)
1014 .loss_function(LossFunction::Tukey);
1015
1016 let result = rpca.fit_transform(&data).unwrap();
1017
1018 let outlier_weights = result.outlier_weights.row(4);
1020 let min_weight = outlier_weights
1021 .iter()
1022 .cloned()
1023 .fold(Float::INFINITY, Float::min);
1024 assert!(min_weight < 0.5); let outliers = result.identify_outliers(0.5);
1028 assert!(outliers[4]); }
1030
1031 #[test]
1032 fn test_m_estimator_decomposition() {
1033 let matrix =
1034 Array2::from_shape_vec((3, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
1035 .unwrap();
1036
1037 let m_est = MEstimatorDecomposition::new()
1038 .rank(2)
1039 .loss_function(LossFunction::Huber);
1040
1041 let result = m_est.decompose(&matrix).unwrap();
1042
1043 assert_eq!(result.u_factor.dim(), (3, 2));
1044 assert_eq!(result.v_factor.dim(), (3, 2));
1045 assert_eq!(result.reconstruction.dim(), (3, 3));
1046 assert_eq!(result.residuals.dim(), (3, 3));
1047 assert_eq!(result.weights.dim(), (3, 3));
1048 }
1049
1050 #[test]
1051 fn test_loss_functions() {
1052 let _config = RobustConfig::default();
1053 let rpca = RobustPCA::new();
1054
1055 let residuals = vec![0.5, 1.0, 1.5, 2.0, 5.0];
1056
1057 for residual in residuals {
1058 let huber_weight = rpca.robust_weight(residual);
1059 assert!(huber_weight > 0.0);
1060 assert!(huber_weight <= 1.0);
1061 }
1062 }
1063
1064 #[test]
1065 fn test_breakdown_analysis() {
1066 let data = Array2::from_shape_vec(
1067 (10, 3),
1068 vec![
1069 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
1070 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0,
1071 30.0,
1072 ],
1073 )
1074 .unwrap();
1075
1076 let breakdown_analysis = BreakdownPointAnalysis::new();
1077 let contamination_levels = vec![0.1, 0.2, 0.3];
1078
1079 let result = breakdown_analysis
1080 .empirical_breakdown_point(&data, &contamination_levels)
1081 .unwrap();
1082
1083 assert_eq!(
1084 result.contamination_levels.len(),
1085 result.direction_errors.len()
1086 );
1087 assert!(result.contamination_levels.len() <= contamination_levels.len());
1088
1089 let score = result.robustness_score();
1091 assert!(score > 0.0);
1092 }
1093
1094 #[test]
1095 fn test_robust_mean_computation() {
1096 let data = Array2::from_shape_vec(
1097 (5, 2),
1098 vec![
1099 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 2.0, 3.0, 100.0, 100.0, ],
1101 )
1102 .unwrap();
1103
1104 let rpca = RobustPCA::new();
1105 let robust_mean = rpca.compute_robust_mean(&data).unwrap();
1106
1107 assert!(robust_mean[0] < 50.0); assert!(robust_mean[1] < 50.0);
1110 }
1111
1112 #[test]
1113 fn test_reconstruction_error() {
1114 let data = Array2::from_shape_vec(
1115 (4, 3),
1116 vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 2.0, 3.0, 4.0],
1117 )
1118 .unwrap();
1119
1120 let mut rpca = RobustPCA::new().n_components(2);
1121 let result = rpca.fit_transform(&data).unwrap();
1122
1123 let errors = result.reconstruction_error(&data).unwrap();
1124 assert_eq!(errors.len(), 4);
1125
1126 for error in errors.iter() {
1128 assert!(*error >= 0.0);
1129 }
1130 }
1131}