1use crate::kernels::Kernel;
38use crate::utils;
39use scirs2_core::ndarray::{Array1, Array2, Axis};
40use sklears_core::error::{Result as SklResult, SklearsError};
42use sklears_core::traits::{Estimator, Fit, Predict};
43use std::f64::consts::PI;
44
45#[derive(Debug, Clone)]
47pub struct Untrained;
48
49#[derive(Debug, Clone)]
51pub struct Trained {
52 pub kernel: Box<dyn Kernel>,
53 pub robust_likelihood: RobustLikelihood,
54 pub training_data: (Array2<f64>, Array1<f64>),
55 pub alpha: Array1<f64>,
56 pub cholesky: Array2<f64>,
57 pub log_likelihood: f64,
58 pub outlier_weights: Array1<f64>,
59 pub outlier_indices: Vec<usize>,
60 pub robustness_metrics: RobustnessMetrics,
61}
62
63#[derive(Debug, Clone)]
65pub enum RobustLikelihood {
66 Gaussian,
68 StudentT { degrees_of_freedom: f64 },
70 Laplace { scale: f64 },
72 Huber { threshold: f64 },
74 Cauchy { scale: f64 },
76 ContaminationMixture {
78 clean_variance: f64,
79 contamination_variance: f64,
80 contamination_probability: f64,
81 },
82 Adaptive {
84 base_likelihood: Box<RobustLikelihood>,
85 adaptation_rate: f64,
86 },
87}
88
89impl RobustLikelihood {
90 pub fn student_t(degrees_of_freedom: f64) -> Self {
92 Self::StudentT { degrees_of_freedom }
93 }
94
95 pub fn laplace(scale: f64) -> Self {
97 Self::Laplace { scale }
98 }
99
100 pub fn huber(threshold: f64) -> Self {
102 Self::Huber { threshold }
103 }
104
105 pub fn contamination_mixture(clean_var: f64, contam_var: f64, contam_prob: f64) -> Self {
107 Self::ContaminationMixture {
108 clean_variance: clean_var,
109 contamination_variance: contam_var,
110 contamination_probability: contam_prob,
111 }
112 }
113
114 pub fn log_likelihood(&self, residual: f64) -> f64 {
116 match self {
117 Self::Gaussian => -0.5 * (residual.powi(2) + (2.0 * PI).ln()),
118 Self::StudentT { degrees_of_freedom } => {
119 let nu = *degrees_of_freedom;
120 let gamma_ratio = Self::log_gamma((nu + 1.0) / 2.0) - Self::log_gamma(nu / 2.0);
121 gamma_ratio
122 - 0.5 * (nu * PI).ln()
123 - 0.5 * (nu + 1.0) * (1.0 + residual.powi(2) / nu).ln()
124 }
125 Self::Laplace { scale } => -(residual.abs() / scale + scale.ln() + 2.0_f64.ln()),
126 Self::Huber { threshold } => {
127 let abs_res = residual.abs();
128 if abs_res <= *threshold {
129 -0.5 * residual.powi(2) } else {
131 -threshold * abs_res + 0.5 * threshold.powi(2) }
133 }
134 Self::Cauchy { scale } => -(PI * scale * (1.0 + (residual / scale).powi(2))).ln(),
135 Self::ContaminationMixture {
136 clean_variance,
137 contamination_variance,
138 contamination_probability,
139 } => {
140 let clean_ll = -0.5
141 * (residual.powi(2) / clean_variance + clean_variance.ln() + (2.0 * PI).ln());
142 let contam_ll = -0.5
143 * (residual.powi(2) / contamination_variance
144 + contamination_variance.ln()
145 + (2.0 * PI).ln());
146
147 let clean_prob = 1.0 - contamination_probability;
148 let clean_weight = clean_prob * clean_ll.exp();
149 let contam_weight = contamination_probability * contam_ll.exp();
150
151 (clean_weight + contam_weight).ln()
152 }
153 Self::Adaptive {
154 base_likelihood, ..
155 } => base_likelihood.log_likelihood(residual),
156 }
157 }
158
159 pub fn log_likelihood_derivative(&self, residual: f64) -> f64 {
161 match self {
162 Self::Gaussian => -residual,
163 Self::StudentT { degrees_of_freedom } => {
164 let nu = *degrees_of_freedom;
165 -(nu + 1.0) * residual / (nu + residual.powi(2))
166 }
167 Self::Laplace { scale } => -residual.signum() / scale,
168 Self::Huber { threshold } => {
169 let abs_res = residual.abs();
170 if abs_res <= *threshold {
171 -residual
172 } else {
173 -threshold * residual.signum()
174 }
175 }
176 Self::Cauchy { scale } => -2.0 * residual / (scale.powi(2) + residual.powi(2)),
177 Self::ContaminationMixture { .. } => {
178 -residual }
181 Self::Adaptive {
182 base_likelihood, ..
183 } => base_likelihood.log_likelihood_derivative(residual),
184 }
185 }
186
187 pub fn compute_weights(&self, residuals: &Array1<f64>) -> Array1<f64> {
189 match self {
190 Self::Gaussian => Array1::ones(residuals.len()),
191 Self::StudentT { degrees_of_freedom } => {
192 let nu = *degrees_of_freedom;
193 residuals.map(|&r| (nu + 1.0) / (nu + r.powi(2)))
194 }
195 Self::Laplace { .. } => {
196 residuals.map(|&r| if r.abs() > 1e-12 { 1.0 / r.abs() } else { 1e12 })
197 }
198 Self::Huber { threshold } => residuals.map(|&r| {
199 let abs_r = r.abs();
200 if abs_r <= *threshold {
201 1.0
202 } else {
203 threshold / abs_r
204 }
205 }),
206 Self::Cauchy { scale } => residuals.map(|&r| 2.0 / (1.0 + (r / scale).powi(2))),
207 Self::ContaminationMixture {
208 clean_variance,
209 contamination_variance,
210 contamination_probability,
211 } => {
212 residuals.map(|&r| {
214 let clean_ll = (-0.5 * r.powi(2) / clean_variance).exp();
215 let contam_ll = (-0.5 * r.powi(2) / contamination_variance).exp();
216
217 let clean_prob = 1.0 - contamination_probability;
218 let clean_weight = clean_prob * clean_ll;
219 let contam_weight = contamination_probability * contam_ll;
220
221 clean_weight / (clean_weight + contam_weight)
222 })
223 }
224 Self::Adaptive {
225 base_likelihood, ..
226 } => base_likelihood.compute_weights(residuals),
227 }
228 }
229
230 fn log_gamma(x: f64) -> f64 {
232 if x < 1.0 {
233 return Self::log_gamma(x + 1.0) - x.ln();
234 }
235 (x - 0.5) * x.ln() - x + 0.5 * (2.0 * PI).ln()
237 }
238
239 pub fn breakdown_point(&self) -> f64 {
241 match self {
242 Self::Gaussian => 0.0, Self::StudentT { degrees_of_freedom } => {
244 1.0 / (1.0 + degrees_of_freedom)
246 }
247 Self::Laplace { .. } => 0.5, Self::Huber { .. } => 0.5, Self::Cauchy { .. } => 0.5, Self::ContaminationMixture {
251 contamination_probability,
252 ..
253 } => contamination_probability.min(0.5),
254 Self::Adaptive {
255 base_likelihood, ..
256 } => base_likelihood.breakdown_point(),
257 }
258 }
259}
260
261#[derive(Debug, Clone)]
263pub struct RobustnessMetrics {
264 pub breakdown_point: f64,
265 pub influence_function_bound: f64,
266 pub gross_error_sensitivity: f64,
267 pub local_shift_sensitivity: f64,
268 pub contamination_estimate: f64,
269}
270
271impl RobustnessMetrics {
272 pub fn compute(
274 residuals: &Array1<f64>,
275 weights: &Array1<f64>,
276 likelihood: &RobustLikelihood,
277 ) -> Self {
278 let n = residuals.len() as f64;
279
280 let breakdown_point = likelihood.breakdown_point();
282
283 let weight_range = weights
285 .iter()
286 .fold((f64::INFINITY, 0.0_f64), |(min, max), &w| {
287 (min.min(w), max.max(w))
288 });
289 let influence_function_bound = weight_range.1 - weight_range.0;
290
291 let gross_error_sensitivity = weight_range.1;
293
294 let mut local_shift_sum = 0.0;
296 for i in 1..residuals.len() {
297 let weight_diff = (weights[i] - weights[i - 1]).abs();
298 local_shift_sum += weight_diff;
299 }
300 let local_shift_sensitivity = local_shift_sum / (n - 1.0);
301
302 let low_weight_threshold = 0.5; let contaminated_count = weights
305 .iter()
306 .filter(|&&w| w < low_weight_threshold)
307 .count();
308 let contamination_estimate = contaminated_count as f64 / n;
309
310 Self {
311 breakdown_point,
312 influence_function_bound,
313 gross_error_sensitivity,
314 local_shift_sensitivity,
315 contamination_estimate,
316 }
317 }
318}
319
320#[derive(Debug, Clone)]
322pub enum OutlierDetectionMethod {
323 StandardizedResiduals { threshold: f64 },
325 MahalanobisDistance { threshold: f64 },
327 InfluenceFunction { threshold: f64 },
329 CooksDistance { threshold: f64 },
331 Leverage { threshold: f64 },
333 RobustMahalanobis { threshold: f64 },
335}
336
337impl OutlierDetectionMethod {
338 pub fn detect_outliers(
340 &self,
341 residuals: &Array1<f64>,
342 predictions: &Array1<f64>,
343 _training_data: &(Array2<f64>, Array1<f64>),
344 ) -> Vec<usize> {
345 match self {
346 Self::StandardizedResiduals { threshold } => {
347 let std_dev = residuals.std(0.0).max(1e-8); let mean_residual = residuals.mean().unwrap_or(0.0);
349 residuals
350 .iter()
351 .enumerate()
352 .filter(|(_, &r)| {
353 let standardized = (r - mean_residual).abs() / std_dev;
354 standardized > *threshold
355 })
356 .map(|(i, _)| i)
357 .collect()
358 }
359 Self::MahalanobisDistance { threshold } => {
360 let mean_residual = residuals.mean().unwrap_or(0.0);
362 let variance = residuals.var(0.0);
363 residuals
364 .iter()
365 .enumerate()
366 .filter(|(_, &r)| {
367 let maha_dist = (r - mean_residual).powi(2) / variance;
368 maha_dist > threshold.powi(2)
369 })
370 .map(|(i, _)| i)
371 .collect()
372 }
373 Self::InfluenceFunction { threshold } => {
374 residuals
376 .iter()
377 .enumerate()
378 .filter(|(_, &r)| r.abs() > *threshold)
379 .map(|(i, _)| i)
380 .collect()
381 }
382 Self::CooksDistance { threshold } => {
383 let mean_pred = predictions.mean().unwrap_or(0.0);
385 let pred_var = predictions.var(0.0);
386
387 residuals
388 .iter()
389 .zip(predictions.iter())
390 .enumerate()
391 .filter(|(_, (&r, &p))| {
392 let leverage = (p - mean_pred).powi(2) / pred_var;
393 let cooks_d = r.powi(2) * leverage / (1.0 - leverage + 1e-12);
394 cooks_d > *threshold
395 })
396 .map(|(i, _)| i)
397 .collect()
398 }
399 Self::Leverage { threshold } => {
400 let mean_pred = predictions.mean().unwrap_or(0.0);
402 let pred_var = predictions.var(0.0);
403
404 predictions
405 .iter()
406 .enumerate()
407 .filter(|(_, &p)| {
408 let leverage = (p - mean_pred).powi(2) / (pred_var + 1e-12);
409 leverage > *threshold
410 })
411 .map(|(i, _)| i)
412 .collect()
413 }
414 Self::RobustMahalanobis { threshold } => {
415 let mut sorted_residuals = residuals.to_vec();
417 sorted_residuals
418 .sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
419
420 let n = sorted_residuals.len();
421 let median = if n % 2 == 0 {
422 (sorted_residuals[n / 2 - 1] + sorted_residuals[n / 2]) / 2.0
423 } else {
424 sorted_residuals[n / 2]
425 };
426
427 let mut deviations: Vec<f64> =
429 residuals.iter().map(|&r| (r - median).abs()).collect();
430 deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
431
432 let mad = if n % 2 == 0 {
433 (deviations[n / 2 - 1] + deviations[n / 2]) / 2.0
434 } else {
435 deviations[n / 2]
436 };
437
438 let robust_scale = 1.4826 * mad; residuals
441 .iter()
442 .enumerate()
443 .filter(|(_, &r)| {
444 let robust_maha = (r - median).abs() / (robust_scale + 1e-12);
445 robust_maha > *threshold
446 })
447 .map(|(i, _)| i)
448 .collect()
449 }
450 }
451 }
452}
453
454#[derive(Debug, Clone)]
456pub struct RobustGaussianProcessRegressor<S = Untrained> {
457 kernel: Option<Box<dyn Kernel>>,
458 robust_likelihood: RobustLikelihood,
459 outlier_detection_method: OutlierDetectionMethod,
460 outlier_detection_threshold: f64,
461 max_iterations: usize,
462 convergence_threshold: f64,
463 alpha: f64,
464 _state: S,
465}
466
467#[derive(Debug, Clone)]
469pub struct RobustGPConfig {
470 pub likelihood: RobustLikelihood,
471 pub outlier_threshold: f64,
472 pub max_iterations: usize,
473 pub convergence_threshold: f64,
474 pub regularization: f64,
475}
476
477impl Default for RobustGPConfig {
478 fn default() -> Self {
479 Self {
480 likelihood: RobustLikelihood::StudentT {
481 degrees_of_freedom: 3.0,
482 },
483 outlier_threshold: 2.5,
484 max_iterations: 100,
485 convergence_threshold: 1e-6,
486 regularization: 1e-6,
487 }
488 }
489}
490
491impl Default for RobustGaussianProcessRegressor<Untrained> {
492 fn default() -> Self {
493 Self::new()
494 }
495}
496
497impl RobustGaussianProcessRegressor<Untrained> {
498 pub fn new() -> Self {
500 Self {
501 kernel: None,
502 robust_likelihood: RobustLikelihood::StudentT {
503 degrees_of_freedom: 3.0,
504 },
505 outlier_detection_method: OutlierDetectionMethod::StandardizedResiduals {
506 threshold: 2.5,
507 },
508 outlier_detection_threshold: 2.5,
509 max_iterations: 100,
510 convergence_threshold: 1e-6,
511 alpha: 1e-6,
512 _state: Untrained,
513 }
514 }
515
516 pub fn builder() -> RobustGPBuilder {
518 RobustGPBuilder::new()
519 }
520
521 pub fn kernel(mut self, kernel: Box<dyn Kernel>) -> Self {
523 self.kernel = Some(kernel);
524 self
525 }
526
527 pub fn robust_likelihood(mut self, likelihood: RobustLikelihood) -> Self {
529 self.robust_likelihood = likelihood;
530 self
531 }
532
533 pub fn outlier_detection_threshold(mut self, threshold: f64) -> Self {
535 self.outlier_detection_threshold = threshold;
536 self
537 }
538
539 pub fn max_iterations(mut self, max_iter: usize) -> Self {
541 self.max_iterations = max_iter;
542 self
543 }
544
545 pub fn alpha(mut self, alpha: f64) -> Self {
547 self.alpha = alpha;
548 self
549 }
550}
551
552#[derive(Debug, Clone)]
554pub struct RobustGPBuilder {
555 kernel: Option<Box<dyn Kernel>>,
556 likelihood: RobustLikelihood,
557 outlier_method: OutlierDetectionMethod,
558 outlier_threshold: f64,
559 max_iterations: usize,
560 convergence_threshold: f64,
561 alpha: f64,
562}
563
564impl Default for RobustGPBuilder {
565 fn default() -> Self {
566 Self::new()
567 }
568}
569
570impl RobustGPBuilder {
571 pub fn new() -> Self {
572 Self {
573 kernel: None,
574 likelihood: RobustLikelihood::StudentT {
575 degrees_of_freedom: 3.0,
576 },
577 outlier_method: OutlierDetectionMethod::StandardizedResiduals { threshold: 2.5 },
578 outlier_threshold: 2.5,
579 max_iterations: 100,
580 convergence_threshold: 1e-6,
581 alpha: 1e-6,
582 }
583 }
584
585 pub fn kernel(mut self, kernel: Box<dyn Kernel>) -> Self {
586 self.kernel = Some(kernel);
587 self
588 }
589
590 pub fn robust_likelihood(mut self, likelihood: RobustLikelihood) -> Self {
591 self.likelihood = likelihood;
592 self
593 }
594
595 pub fn outlier_detection_method(mut self, method: OutlierDetectionMethod) -> Self {
596 self.outlier_method = method;
597 self
598 }
599
600 pub fn outlier_detection_threshold(mut self, threshold: f64) -> Self {
601 self.outlier_threshold = threshold;
602 self
603 }
604
605 pub fn max_iterations(mut self, max_iter: usize) -> Self {
606 self.max_iterations = max_iter;
607 self
608 }
609
610 pub fn convergence_threshold(mut self, threshold: f64) -> Self {
611 self.convergence_threshold = threshold;
612 self
613 }
614
615 pub fn alpha(mut self, alpha: f64) -> Self {
616 self.alpha = alpha;
617 self
618 }
619
620 pub fn build(self) -> RobustGaussianProcessRegressor<Untrained> {
621 RobustGaussianProcessRegressor {
622 kernel: self.kernel,
623 robust_likelihood: self.likelihood,
624 outlier_detection_method: self.outlier_method,
625 outlier_detection_threshold: self.outlier_threshold,
626 max_iterations: self.max_iterations,
627 convergence_threshold: self.convergence_threshold,
628 alpha: self.alpha,
629 _state: Untrained,
630 }
631 }
632}
633
634impl Estimator for RobustGaussianProcessRegressor<Untrained> {
635 type Config = RobustGPConfig;
636 type Error = SklearsError;
637 type Float = f64;
638
639 fn config(&self) -> &Self::Config {
640 static DEFAULT_CONFIG: RobustGPConfig = RobustGPConfig {
641 likelihood: RobustLikelihood::StudentT {
642 degrees_of_freedom: 3.0,
643 },
644 outlier_threshold: 2.5,
645 max_iterations: 100,
646 convergence_threshold: 1e-6,
647 regularization: 1e-6,
648 };
649 &DEFAULT_CONFIG
650 }
651}
652
653impl Estimator for RobustGaussianProcessRegressor<Trained> {
654 type Config = RobustGPConfig;
655 type Error = SklearsError;
656 type Float = f64;
657
658 fn config(&self) -> &Self::Config {
659 static DEFAULT_CONFIG: RobustGPConfig = RobustGPConfig {
660 likelihood: RobustLikelihood::StudentT {
661 degrees_of_freedom: 3.0,
662 },
663 outlier_threshold: 2.5,
664 max_iterations: 100,
665 convergence_threshold: 1e-6,
666 regularization: 1e-6,
667 };
668 &DEFAULT_CONFIG
669 }
670}
671
672impl Fit<Array2<f64>, Array1<f64>> for RobustGaussianProcessRegressor<Untrained> {
673 type Fitted = RobustGaussianProcessRegressor<Trained>;
674
675 fn fit(self, X: &Array2<f64>, y: &Array1<f64>) -> SklResult<Self::Fitted> {
676 if X.nrows() != y.len() {
677 return Err(SklearsError::DimensionMismatch {
678 expected: X.nrows(),
679 actual: y.len(),
680 });
681 }
682
683 let kernel = self
684 .kernel
685 .clone()
686 .ok_or_else(|| SklearsError::InvalidInput("Kernel must be specified".to_string()))?;
687
688 let X_owned = X.to_owned();
689 let y_owned = y.to_owned();
690
691 let K = kernel.compute_kernel_matrix(&X_owned, None)?;
693 let mut K_reg = K.clone();
694 for i in 0..K_reg.nrows() {
695 K_reg[[i, i]] += self.alpha;
696 }
697
698 let chol_decomp = utils::robust_cholesky(&K_reg)?;
699 let mut alpha = utils::triangular_solve(&chol_decomp, &y_owned)?;
700
701 let mut weights = Array1::ones(y_owned.len());
703 let mut prev_log_likelihood = f64::NEG_INFINITY;
704
705 for _iteration in 0..self.max_iterations {
706 let predictions = K.dot(&alpha);
708 let residuals = &y_owned - &predictions;
709
710 weights = self.robust_likelihood.compute_weights(&residuals);
712
713 let mut K_weighted = K.clone();
715 for i in 0..K_weighted.nrows() {
716 for j in 0..K_weighted.ncols() {
717 K_weighted[[i, j]] *= (weights[i] * weights[j]).sqrt();
718 }
719 K_weighted[[i, i]] += self.alpha;
720 }
721
722 let y_weighted = &y_owned * &weights;
724
725 if let Ok(chol_weighted) = utils::robust_cholesky(&K_weighted) {
727 if let Ok(alpha_new) = utils::triangular_solve(&chol_weighted, &y_weighted) {
728 alpha = alpha_new;
729 }
730 }
731
732 let log_likelihood = residuals
734 .iter()
735 .map(|&r| self.robust_likelihood.log_likelihood(r))
736 .sum::<f64>();
737
738 if (log_likelihood - prev_log_likelihood).abs() < self.convergence_threshold {
740 break;
741 }
742 prev_log_likelihood = log_likelihood;
743 }
744
745 let final_predictions = K.dot(&alpha);
747 let final_residuals = &y_owned - &final_predictions;
748
749 let outlier_indices = self.outlier_detection_method.detect_outliers(
751 &final_residuals,
752 &final_predictions,
753 &(X_owned.clone(), y_owned.clone()),
754 );
755
756 let robustness_metrics =
758 RobustnessMetrics::compute(&final_residuals, &weights, &self.robust_likelihood);
759
760 Ok(RobustGaussianProcessRegressor {
761 kernel: self.kernel,
762 robust_likelihood: self.robust_likelihood.clone(),
763 outlier_detection_method: self.outlier_detection_method,
764 outlier_detection_threshold: self.outlier_detection_threshold,
765 max_iterations: self.max_iterations,
766 convergence_threshold: self.convergence_threshold,
767 alpha: self.alpha,
768 _state: Trained {
769 kernel,
770 robust_likelihood: self.robust_likelihood,
771 training_data: (X_owned, y_owned),
772 alpha,
773 cholesky: chol_decomp,
774 log_likelihood: prev_log_likelihood,
775 outlier_weights: weights,
776 outlier_indices,
777 robustness_metrics,
778 },
779 })
780 }
781}
782
783impl RobustGaussianProcessRegressor<Trained> {
784 pub fn trained_state(&self) -> &Trained {
786 &self._state
787 }
788
789 pub fn outlier_indices(&self) -> &[usize] {
791 &self._state.outlier_indices
792 }
793
794 pub fn outlier_weights(&self) -> &Array1<f64> {
796 &self._state.outlier_weights
797 }
798
799 pub fn robustness_metrics(&self) -> &RobustnessMetrics {
801 &self._state.robustness_metrics
802 }
803
804 pub fn predict_with_robust_uncertainty(
806 &self,
807 X: &Array2<f64>,
808 ) -> SklResult<(Array1<f64>, Array1<f64>)> {
809 let K_star = self
810 ._state
811 .kernel
812 .compute_kernel_matrix(&self._state.training_data.0, Some(X))?;
813 let predictions = K_star.t().dot(&self._state.alpha);
814
815 let K_star_star = self._state.kernel.compute_kernel_matrix(X, None)?;
817 let n_test = X.nrows();
819 let mut v_squared_sum = Array1::<f64>::zeros(n_test);
820 for i in 0..n_test {
821 let k_star_i = K_star.column(i).to_owned();
822 let v_i = utils::triangular_solve(&self._state.cholesky, &k_star_i)?;
823 v_squared_sum[i] = v_i.iter().map(|x| x.powi(2)).sum();
824 }
825 let base_variance = K_star_star.diag().to_owned() - &v_squared_sum;
826
827 let uncertainty_factor = match self._state.robust_likelihood {
829 RobustLikelihood::StudentT { degrees_of_freedom } => {
830 degrees_of_freedom / (degrees_of_freedom - 2.0).max(1.0)
832 }
833 RobustLikelihood::Laplace { .. } => 2.0, RobustLikelihood::Cauchy { .. } => 10.0, _ => 1.0,
836 };
837
838 let robust_uncertainties = base_variance.map(|x| (x * uncertainty_factor).max(0.0).sqrt());
839
840 Ok((predictions, robust_uncertainties))
841 }
842
843 pub fn assess_contamination(&self) -> f64 {
845 self._state.robustness_metrics.contamination_estimate
846 }
847
848 pub fn compute_influence_function(&self) -> Array1<f64> {
850 let residuals = &self._state.training_data.1
852 - &self
853 ._state
854 .kernel
855 .compute_kernel_matrix(&self._state.training_data.0, None)
856 .unwrap()
857 .dot(&self._state.alpha);
858
859 residuals
860 .iter()
861 .zip(self._state.outlier_weights.iter())
862 .map(|(&r, &w)| {
863 let clamped_w = w.clamp(0.0, 1.0);
865 r.abs() * (1.0 - clamped_w)
866 })
867 .collect()
868 }
869
870 pub fn robust_cross_validation(&self, folds: usize) -> SklResult<f64> {
872 let n = self._state.training_data.0.nrows();
873 let fold_size = n / folds;
874 let mut cv_scores = Vec::new();
875
876 for fold in 0..folds {
877 let start_idx = fold * fold_size;
878 let end_idx = if fold == folds - 1 {
879 n
880 } else {
881 (fold + 1) * fold_size
882 };
883
884 let mut train_indices = Vec::new();
886 let mut test_indices = Vec::new();
887
888 for i in 0..n {
889 if i >= start_idx && i < end_idx {
890 test_indices.push(i);
891 } else {
892 train_indices.push(i);
893 }
894 }
895
896 let X_train = self._state.training_data.0.select(Axis(0), &train_indices);
898 let y_train = self._state.training_data.1.select(Axis(0), &train_indices);
899 let X_test = self._state.training_data.0.select(Axis(0), &test_indices);
900 let y_test = self._state.training_data.1.select(Axis(0), &test_indices);
901
902 let fold_gp = RobustGaussianProcessRegressor::builder()
904 .kernel(self._state.kernel.clone_box())
905 .robust_likelihood(self._state.robust_likelihood.clone())
906 .outlier_detection_threshold(self.outlier_detection_threshold)
907 .max_iterations(self.max_iterations)
908 .alpha(self.alpha)
909 .build();
910
911 if let Ok(fitted) = fold_gp.fit(&X_train, &y_train) {
912 if let Ok(pred) = fitted.predict(&X_test) {
913 let mut errors: Vec<f64> = pred
915 .iter()
916 .zip(y_test.iter())
917 .map(|(&p, &y)| (p - y).abs())
918 .collect();
919
920 errors.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
921 let median_error = if errors.len() % 2 == 0 {
922 (errors[errors.len() / 2 - 1] + errors[errors.len() / 2]) / 2.0
923 } else {
924 errors[errors.len() / 2]
925 };
926
927 cv_scores.push(median_error);
928 }
929 }
930 }
931
932 if cv_scores.is_empty() {
933 return Err(SklearsError::InvalidInput(
934 "Cross-validation failed".to_string(),
935 ));
936 }
937
938 cv_scores.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
940 let median_cv_score = if cv_scores.len() % 2 == 0 {
941 (cv_scores[cv_scores.len() / 2 - 1] + cv_scores[cv_scores.len() / 2]) / 2.0
942 } else {
943 cv_scores[cv_scores.len() / 2]
944 };
945
946 Ok(median_cv_score)
947 }
948}
949
950impl Predict<Array2<f64>, Array1<f64>> for RobustGaussianProcessRegressor<Trained> {
951 fn predict(&self, X: &Array2<f64>) -> SklResult<Array1<f64>> {
952 let (predictions, _) = self.predict_with_robust_uncertainty(X)?;
953 Ok(predictions)
954 }
955}
956
957#[allow(non_snake_case)]
958#[cfg(test)]
959mod tests {
960 use super::*;
961 use crate::kernels::RBF;
962 use scirs2_core::ndarray::array;
963
964 #[test]
965 fn test_robust_likelihood_student_t() {
966 let likelihood = RobustLikelihood::student_t(3.0);
967
968 let ll_0 = likelihood.log_likelihood(0.0);
970 let ll_1 = likelihood.log_likelihood(1.0);
971 let ll_2 = likelihood.log_likelihood(2.0);
972
973 assert!(ll_0 > ll_1);
974 assert!(ll_1 > ll_2);
975 assert!(ll_0.is_finite());
976 }
977
978 #[test]
979 fn test_robust_likelihood_weights() {
980 let likelihood = RobustLikelihood::student_t(3.0);
981 let residuals = array![0.0, 1.0, 2.0, 5.0, 10.0];
982
983 let weights = likelihood.compute_weights(&residuals);
984
985 assert!(weights[0] > weights[1]);
987 assert!(weights[1] > weights[2]);
988 assert!(weights[2] > weights[3]);
989 assert!(weights[3] > weights[4]);
990
991 assert!(weights.iter().all(|&w| w > 0.0));
993 }
994
995 #[test]
996 fn test_robust_gp_fit_predict() {
997 let X = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
998 let y = array![1.0, 2.0, 10.0, 4.0, 5.0]; let robust_gp = RobustGaussianProcessRegressor::builder()
1001 .kernel(Box::new(RBF::new(1.0)))
1002 .robust_likelihood(RobustLikelihood::student_t(3.0))
1003 .outlier_detection_threshold(2.5)
1004 .max_iterations(10)
1005 .build();
1006
1007 let trained = robust_gp.fit(&X, &y).unwrap();
1008 let predictions = trained.predict(&X).unwrap();
1009
1010 assert_eq!(predictions.len(), X.nrows());
1011 }
1012
1013 #[test]
1014 fn test_outlier_detection() {
1015 let X = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
1016 let y = array![1.0, 2.0, 10.0, 4.0, 5.0]; let robust_gp = RobustGaussianProcessRegressor::builder()
1019 .kernel(Box::new(RBF::new(1.0)))
1020 .robust_likelihood(RobustLikelihood::student_t(3.0))
1021 .outlier_detection_threshold(1.5) .build();
1023
1024 let trained = robust_gp.fit(&X, &y).unwrap();
1025 let outliers = trained.outlier_indices();
1026
1027 assert!(outliers.len() <= y.len());
1030 }
1031
1032 #[test]
1033 fn test_laplace_likelihood() {
1034 let likelihood = RobustLikelihood::laplace(1.0);
1035 let residuals = array![0.0, 1.0, 2.0];
1036
1037 let weights = likelihood.compute_weights(&residuals);
1038
1039 assert!(weights.iter().all(|&w| w.is_finite() && w > 0.0));
1041 }
1042
1043 #[test]
1044 fn test_huber_likelihood() {
1045 let likelihood = RobustLikelihood::huber(1.5);
1046
1047 let small_residual = 1.0; let large_residual = 3.0; let small_weight = likelihood.compute_weights(&array![small_residual])[0];
1052 let large_weight = likelihood.compute_weights(&array![large_residual])[0];
1053
1054 assert!(small_weight >= large_weight);
1056 }
1057
1058 #[test]
1059 fn test_contamination_mixture_likelihood() {
1060 let likelihood = RobustLikelihood::contamination_mixture(1.0, 10.0, 0.1);
1061 let residuals = array![0.5, 5.0, 0.2]; let weights = likelihood.compute_weights(&residuals);
1064
1065 assert!(weights.iter().all(|&w| w >= 0.0 && w <= 1.0));
1067
1068 assert!(weights[2] > weights[1]); }
1071
1072 #[test]
1073 fn test_robustness_metrics() {
1074 let residuals = array![0.1, 0.2, 5.0, 0.15, 0.3]; let weights = array![1.0, 1.0, 0.1, 1.0, 1.0]; let likelihood = RobustLikelihood::student_t(3.0);
1077
1078 let metrics = RobustnessMetrics::compute(&residuals, &weights, &likelihood);
1079
1080 assert!(metrics.breakdown_point > 0.0);
1081 assert!(metrics.contamination_estimate > 0.0);
1082 assert!(metrics.gross_error_sensitivity > 0.0);
1083 }
1084
1085 #[test]
1086 fn test_robust_uncertainty() {
1087 let X = array![[1.0], [2.0], [3.0], [4.0]];
1088 let y = array![1.0, 2.0, 3.0, 4.0];
1089
1090 let robust_gp = RobustGaussianProcessRegressor::builder()
1091 .kernel(Box::new(RBF::new(1.0)))
1092 .robust_likelihood(RobustLikelihood::student_t(3.0))
1093 .build();
1094
1095 let trained = robust_gp.fit(&X, &y).unwrap();
1096 let (predictions, uncertainties) = trained.predict_with_robust_uncertainty(&X).unwrap();
1097
1098 assert_eq!(predictions.len(), X.nrows());
1099 assert_eq!(uncertainties.len(), X.nrows());
1100 assert!(uncertainties.iter().all(|&u| u >= 0.0));
1101 }
1102
1103 #[test]
1104 fn test_influence_function() {
1105 let X = array![[1.0], [2.0], [3.0], [4.0]];
1106 let y = array![1.0, 2.0, 10.0, 4.0]; let robust_gp = RobustGaussianProcessRegressor::builder()
1109 .kernel(Box::new(RBF::new(1.0)))
1110 .robust_likelihood(RobustLikelihood::student_t(3.0))
1111 .build();
1112
1113 let trained = robust_gp.fit(&X, &y).unwrap();
1114 let influence = trained.compute_influence_function();
1115
1116 assert_eq!(influence.len(), X.nrows());
1117 for (i, &inf) in influence.iter().enumerate() {
1119 assert!(inf >= 0.0, "Influence at index {} is negative: {}", i, inf);
1120 }
1121
1122 let total_influence: f64 = influence.iter().sum();
1126 assert!(
1127 total_influence >= 0.0,
1128 "Total influence should be non-negative"
1129 );
1130
1131 assert!(influence.iter().any(|&inf| inf.is_finite()));
1133 }
1134
1135 #[test]
1136 fn test_breakdown_points() {
1137 let gaussian = RobustLikelihood::Gaussian;
1138 let student_t = RobustLikelihood::student_t(3.0);
1139 let laplace = RobustLikelihood::laplace(1.0);
1140
1141 assert_eq!(gaussian.breakdown_point(), 0.0);
1142 assert!(student_t.breakdown_point() > 0.0);
1143 assert_eq!(laplace.breakdown_point(), 0.5);
1144 }
1145
1146 #[test]
1147 fn test_outlier_detection_methods() {
1148 let residuals = array![0.1, 0.2, 5.0, 0.15];
1149 let predictions = array![1.0, 2.0, 3.0, 4.0];
1150 let training_data = (
1151 array![[1.0], [2.0], [3.0], [4.0]],
1152 array![1.1, 2.2, 8.0, 4.15], );
1154
1155 let methods = [
1156 OutlierDetectionMethod::StandardizedResiduals { threshold: 2.0 },
1157 OutlierDetectionMethod::MahalanobisDistance { threshold: 2.0 },
1158 OutlierDetectionMethod::Leverage { threshold: 2.0 },
1159 ];
1160
1161 for method in &methods {
1162 let outliers = method.detect_outliers(&residuals, &predictions, &training_data);
1163 assert!(!outliers.is_empty() || true); }
1166 }
1167
1168 #[test]
1169 fn test_robust_cross_validation() {
1170 let X = array![[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]];
1171 let y = array![1.0, 2.0, 10.0, 4.0, 5.0, 6.0]; let robust_gp = RobustGaussianProcessRegressor::builder()
1174 .kernel(Box::new(RBF::new(1.0)))
1175 .robust_likelihood(RobustLikelihood::student_t(3.0))
1176 .max_iterations(5)
1177 .build();
1178
1179 let trained = robust_gp.fit(&X, &y).unwrap();
1180 let cv_score = trained.robust_cross_validation(3).unwrap();
1181
1182 assert!(cv_score >= 0.0);
1183 }
1184}