1use super::helpers::*;
4use crate::matrix::FdMatrix;
5use crate::scalar_on_function::{
6 fregre_lm, functional_logistic, sigmoid, FregreLmResult, FunctionalLogisticResult,
7};
8use rand::prelude::*;
9
10pub struct CalibrationDiagnosticsResult {
16 pub brier_score: f64,
18 pub log_loss: f64,
20 pub hosmer_lemeshow_chi2: f64,
22 pub hosmer_lemeshow_df: usize,
24 pub n_groups: usize,
26 pub reliability_bins: Vec<(f64, f64)>,
28 pub bin_counts: Vec<usize>,
30}
31
32pub fn calibration_diagnostics(
34 fit: &FunctionalLogisticResult,
35 y: &[f64],
36 n_groups: usize,
37) -> Option<CalibrationDiagnosticsResult> {
38 let n = fit.probabilities.len();
39 if n == 0 || n != y.len() || n_groups < 2 {
40 return None;
41 }
42
43 let brier_score: f64 = fit
45 .probabilities
46 .iter()
47 .zip(y)
48 .map(|(&p, &yi)| (p - yi).powi(2))
49 .sum::<f64>()
50 / n as f64;
51
52 let log_loss: f64 = -fit
54 .probabilities
55 .iter()
56 .zip(y)
57 .map(|(&p, &yi)| {
58 let p_clip = p.clamp(1e-15, 1.0 - 1e-15);
59 yi * p_clip.ln() + (1.0 - yi) * (1.0 - p_clip).ln()
60 })
61 .sum::<f64>()
62 / n as f64;
63
64 let (hosmer_lemeshow_chi2, reliability_bins, bin_counts) =
65 hosmer_lemeshow_computation(&fit.probabilities, y, n, n_groups);
66
67 let actual_groups = bin_counts.len();
68 let hosmer_lemeshow_df = if actual_groups > 2 {
69 actual_groups - 2
70 } else {
71 1
72 };
73
74 Some(CalibrationDiagnosticsResult {
75 brier_score,
76 log_loss,
77 hosmer_lemeshow_chi2,
78 hosmer_lemeshow_df,
79 n_groups: actual_groups,
80 reliability_bins,
81 bin_counts,
82 })
83}
84
85pub struct EceResult {
91 pub ece: f64,
93 pub mce: f64,
95 pub ace: f64,
97 pub n_bins: usize,
99 pub bin_ece_contributions: Vec<f64>,
101}
102
103pub fn expected_calibration_error(
110 fit: &FunctionalLogisticResult,
111 y: &[f64],
112 n_bins: usize,
113) -> Option<EceResult> {
114 let n = fit.probabilities.len();
115 if n == 0 || n != y.len() || n_bins == 0 {
116 return None;
117 }
118
119 let (ece, mce, bin_ece_contributions) =
120 compute_equal_width_ece(&fit.probabilities, y, n, n_bins);
121
122 let mut sorted_idx: Vec<usize> = (0..n).collect();
124 sorted_idx.sort_by(|&a, &b| {
125 fit.probabilities[a]
126 .partial_cmp(&fit.probabilities[b])
127 .unwrap_or(std::cmp::Ordering::Equal)
128 });
129 let group_size = n / n_bins.max(1);
130 let mut ace = 0.0;
131 let mut start = 0;
132 for g in 0..n_bins {
133 if start >= n {
134 break;
135 }
136 let end = if g < n_bins - 1 {
137 (start + group_size).min(n)
138 } else {
139 n
140 };
141 ace += calibration_gap_weighted(&sorted_idx[start..end], y, &fit.probabilities, n);
142 start = end;
143 }
144
145 Some(EceResult {
146 ece,
147 mce,
148 ace,
149 n_bins,
150 bin_ece_contributions,
151 })
152}
153
154pub struct ConformalPredictionResult {
160 pub predictions: Vec<f64>,
162 pub lower: Vec<f64>,
164 pub upper: Vec<f64>,
166 pub residual_quantile: f64,
168 pub coverage: f64,
170 pub calibration_scores: Vec<f64>,
172}
173
174pub fn conformal_prediction_residuals(
190 fit: &FregreLmResult,
191 train_data: &FdMatrix,
192 train_y: &[f64],
193 test_data: &FdMatrix,
194 scalar_covariates_train: Option<&FdMatrix>,
195 scalar_covariates_test: Option<&FdMatrix>,
196 cal_fraction: f64,
197 alpha: f64,
198 seed: u64,
199) -> Option<ConformalPredictionResult> {
200 let (n, m) = train_data.shape();
201 let (n_test, m_test) = test_data.shape();
202 let ncomp = fit.ncomp;
203 let (_n_cal, n_proper) = validate_conformal_inputs(
204 n,
205 m,
206 n_test,
207 m_test,
208 train_y.len(),
209 ncomp,
210 cal_fraction,
211 alpha,
212 )?;
213
214 let mut rng = StdRng::seed_from_u64(seed);
216 let mut all_idx: Vec<usize> = (0..n).collect();
217 all_idx.shuffle(&mut rng);
218 let proper_idx = &all_idx[..n_proper];
219 let cal_idx = &all_idx[n_proper..];
220
221 let proper_data = subsample_rows(train_data, proper_idx);
223 let proper_y: Vec<f64> = proper_idx.iter().map(|&i| train_y[i]).collect();
224 let proper_sc = scalar_covariates_train.map(|sc| subsample_rows(sc, proper_idx));
225
226 let refit = fregre_lm(&proper_data, &proper_y, proper_sc.as_ref(), ncomp)?;
228
229 let cal_data = subsample_rows(train_data, cal_idx);
231 let cal_sc = scalar_covariates_train.map(|sc| subsample_rows(sc, cal_idx));
232 let cal_scores = project_scores(&cal_data, &refit.fpca.mean, &refit.fpca.rotation, ncomp);
233 let cal_preds = predict_from_scores(
234 &cal_scores,
235 &refit.coefficients,
236 &refit.gamma,
237 cal_sc.as_ref(),
238 ncomp,
239 );
240 let cal_n = cal_idx.len();
241
242 let calibration_scores: Vec<f64> = cal_idx
243 .iter()
244 .enumerate()
245 .map(|(i, &orig)| (train_y[orig] - cal_preds[i]).abs())
246 .collect();
247
248 let (residual_quantile, coverage) =
249 conformal_quantile_and_coverage(&calibration_scores, cal_n, alpha);
250
251 let test_scores = project_scores(test_data, &refit.fpca.mean, &refit.fpca.rotation, ncomp);
253 let predictions = predict_from_scores(
254 &test_scores,
255 &refit.coefficients,
256 &refit.gamma,
257 scalar_covariates_test,
258 ncomp,
259 );
260
261 let lower: Vec<f64> = predictions.iter().map(|&p| p - residual_quantile).collect();
262 let upper: Vec<f64> = predictions.iter().map(|&p| p + residual_quantile).collect();
263
264 Some(ConformalPredictionResult {
265 predictions,
266 lower,
267 upper,
268 residual_quantile,
269 coverage,
270 calibration_scores,
271 })
272}
273
274#[derive(Debug, Clone, Copy, PartialEq, Eq)]
280pub enum DepthType {
281 FraimanMuniz,
282 ModifiedBand,
283 FunctionalSpatial,
284}
285
286pub struct RegressionDepthResult {
288 pub beta_depth: f64,
290 pub score_depths: Vec<f64>,
292 pub mean_score_depth: f64,
294 pub depth_type: DepthType,
296 pub n_boot_success: usize,
298}
299
300pub fn regression_depth(
314 fit: &FregreLmResult,
315 data: &FdMatrix,
316 y: &[f64],
317 scalar_covariates: Option<&FdMatrix>,
318 n_boot: usize,
319 depth_type: DepthType,
320 seed: u64,
321) -> Option<RegressionDepthResult> {
322 let (n, m) = data.shape();
323 if n < 4 || m == 0 || n != y.len() || n_boot == 0 {
324 return None;
325 }
326 let ncomp = fit.ncomp;
327 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
328 let score_depths = compute_score_depths(&scores, depth_type);
329 if score_depths.is_empty() {
330 return None;
331 }
332 let mean_score_depth = score_depths.iter().sum::<f64>() / score_depths.len() as f64;
333
334 let orig_coefs: Vec<f64> = (0..ncomp).map(|k| fit.coefficients[1 + k]).collect();
335 let mut rng = StdRng::seed_from_u64(seed);
336 let mut boot_coefs = Vec::new();
337 for _ in 0..n_boot {
338 let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
339 let boot_data = subsample_rows(data, &idx);
340 let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
341 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
342 if let Some(refit) = fregre_lm(&boot_data, &boot_y, boot_sc.as_ref(), ncomp) {
343 boot_coefs.push((0..ncomp).map(|k| refit.coefficients[1 + k]).collect());
344 }
345 }
346
347 let beta_depth = beta_depth_from_bootstrap(&boot_coefs, &orig_coefs, ncomp, depth_type);
348
349 Some(RegressionDepthResult {
350 beta_depth,
351 score_depths,
352 mean_score_depth,
353 depth_type,
354 n_boot_success: boot_coefs.len(),
355 })
356}
357
358pub fn regression_depth_logistic(
360 fit: &FunctionalLogisticResult,
361 data: &FdMatrix,
362 y: &[f64],
363 scalar_covariates: Option<&FdMatrix>,
364 n_boot: usize,
365 depth_type: DepthType,
366 seed: u64,
367) -> Option<RegressionDepthResult> {
368 let (n, m) = data.shape();
369 if n < 4 || m == 0 || n != y.len() || n_boot == 0 {
370 return None;
371 }
372 let ncomp = fit.ncomp;
373 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
374 let score_depths = compute_score_depths(&scores, depth_type);
375 if score_depths.is_empty() {
376 return None;
377 }
378 let mean_score_depth = score_depths.iter().sum::<f64>() / score_depths.len() as f64;
379
380 let orig_coefs: Vec<f64> = (0..ncomp).map(|k| fit.coefficients[1 + k]).collect();
381 let mut rng = StdRng::seed_from_u64(seed);
382 let boot_coefs =
383 bootstrap_logistic_coefs(data, y, scalar_covariates, n, ncomp, n_boot, &mut rng);
384
385 let beta_depth = beta_depth_from_bootstrap(&boot_coefs, &orig_coefs, ncomp, depth_type);
386
387 Some(RegressionDepthResult {
388 beta_depth,
389 score_depths,
390 mean_score_depth,
391 depth_type,
392 n_boot_success: boot_coefs.len(),
393 })
394}
395
396pub struct StabilityAnalysisResult {
402 pub beta_t_std: Vec<f64>,
404 pub coefficient_std: Vec<f64>,
406 pub metric_std: f64,
408 pub beta_t_cv: Vec<f64>,
410 pub importance_stability: f64,
412 pub n_boot_success: usize,
414}
415
416pub fn explanation_stability(
421 data: &FdMatrix,
422 y: &[f64],
423 scalar_covariates: Option<&FdMatrix>,
424 ncomp: usize,
425 n_boot: usize,
426 seed: u64,
427) -> Option<StabilityAnalysisResult> {
428 let (n, m) = data.shape();
429 if n < 4 || m == 0 || n != y.len() || n_boot < 2 || ncomp == 0 {
430 return None;
431 }
432
433 let mut rng = StdRng::seed_from_u64(seed);
434 let mut all_beta_t: Vec<Vec<f64>> = Vec::new();
435 let mut all_coefs: Vec<Vec<f64>> = Vec::new();
436 let mut all_metrics: Vec<f64> = Vec::new();
437 let mut all_abs_coefs: Vec<Vec<f64>> = Vec::new();
438
439 for _ in 0..n_boot {
440 let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
441 let boot_data = subsample_rows(data, &idx);
442 let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
443 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
444 if let Some(refit) = fregre_lm(&boot_data, &boot_y, boot_sc.as_ref(), ncomp) {
445 all_beta_t.push(refit.beta_t.clone());
446 let coefs: Vec<f64> = (0..ncomp).map(|k| refit.coefficients[1 + k]).collect();
447 all_abs_coefs.push(coefs.iter().map(|c| c.abs()).collect());
448 all_coefs.push(coefs);
449 all_metrics.push(refit.r_squared);
450 }
451 }
452
453 build_stability_result(
454 &all_beta_t,
455 &all_coefs,
456 &all_abs_coefs,
457 &all_metrics,
458 m,
459 ncomp,
460 )
461}
462
463pub fn explanation_stability_logistic(
465 data: &FdMatrix,
466 y: &[f64],
467 scalar_covariates: Option<&FdMatrix>,
468 ncomp: usize,
469 n_boot: usize,
470 seed: u64,
471) -> Option<StabilityAnalysisResult> {
472 let (n, m) = data.shape();
473 if n < 4 || m == 0 || n != y.len() || n_boot < 2 || ncomp == 0 {
474 return None;
475 }
476
477 let mut rng = StdRng::seed_from_u64(seed);
478 let (all_beta_t, all_coefs, all_abs_coefs, all_metrics) =
479 bootstrap_logistic_stability(data, y, scalar_covariates, n, ncomp, n_boot, &mut rng);
480
481 build_stability_result(
482 &all_beta_t,
483 &all_coefs,
484 &all_abs_coefs,
485 &all_metrics,
486 m,
487 ncomp,
488 )
489}
490
491pub struct AnchorCondition {
497 pub component: usize,
499 pub lower_bound: f64,
501 pub upper_bound: f64,
503}
504
505pub struct AnchorRule {
507 pub conditions: Vec<AnchorCondition>,
509 pub precision: f64,
511 pub coverage: f64,
513 pub n_matching: usize,
515}
516
517pub struct AnchorResult {
519 pub rule: AnchorRule,
521 pub observation: usize,
523 pub predicted_value: f64,
525}
526
527pub fn anchor_explanation(
540 fit: &FregreLmResult,
541 data: &FdMatrix,
542 scalar_covariates: Option<&FdMatrix>,
543 observation: usize,
544 precision_threshold: f64,
545 n_bins: usize,
546) -> Option<AnchorResult> {
547 let (n, m) = data.shape();
548 if n == 0 || m != fit.fpca.mean.len() || observation >= n || n_bins < 2 {
549 return None;
550 }
551 let ncomp = fit.ncomp;
552 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
553 let obs_pred = fit.fitted_values[observation];
554 let tol = fit.residual_se;
555
556 let same_pred = |i: usize| -> bool {
558 let mut yhat = fit.coefficients[0];
559 for k in 0..ncomp {
560 yhat += fit.coefficients[1 + k] * scores[(i, k)];
561 }
562 if let Some(sc) = scalar_covariates {
563 for j in 0..fit.gamma.len() {
564 yhat += fit.gamma[j] * sc[(i, j)];
565 }
566 }
567 (yhat - obs_pred).abs() <= tol
568 };
569
570 let (rule, _) = anchor_beam_search(
571 &scores,
572 ncomp,
573 n,
574 observation,
575 precision_threshold,
576 n_bins,
577 &same_pred,
578 );
579
580 Some(AnchorResult {
581 rule,
582 observation,
583 predicted_value: obs_pred,
584 })
585}
586
587pub fn anchor_explanation_logistic(
591 fit: &FunctionalLogisticResult,
592 data: &FdMatrix,
593 scalar_covariates: Option<&FdMatrix>,
594 observation: usize,
595 precision_threshold: f64,
596 n_bins: usize,
597) -> Option<AnchorResult> {
598 let (n, m) = data.shape();
599 if n == 0 || m != fit.fpca.mean.len() || observation >= n || n_bins < 2 {
600 return None;
601 }
602 let ncomp = fit.ncomp;
603 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
604 let obs_class = fit.predicted_classes[observation];
605 let obs_prob = fit.probabilities[observation];
606 let p_scalar = fit.gamma.len();
607
608 let same_pred = |i: usize| -> bool {
610 let mut eta = fit.intercept;
611 for k in 0..ncomp {
612 eta += fit.coefficients[1 + k] * scores[(i, k)];
613 }
614 if let Some(sc) = scalar_covariates {
615 for j in 0..p_scalar {
616 eta += fit.gamma[j] * sc[(i, j)];
617 }
618 }
619 let pred_class = if sigmoid(eta) >= 0.5 { 1u8 } else { 0u8 };
620 pred_class == obs_class
621 };
622
623 let (rule, _) = anchor_beam_search(
624 &scores,
625 ncomp,
626 n,
627 observation,
628 precision_threshold,
629 n_bins,
630 &same_pred,
631 );
632
633 Some(AnchorResult {
634 rule,
635 observation,
636 predicted_value: obs_prob,
637 })
638}
639
640fn hosmer_lemeshow_computation(
646 probabilities: &[f64],
647 y: &[f64],
648 n: usize,
649 n_groups: usize,
650) -> (f64, Vec<(f64, f64)>, Vec<usize>) {
651 let mut sorted_idx: Vec<usize> = (0..n).collect();
652 sorted_idx.sort_by(|&a, &b| {
653 probabilities[a]
654 .partial_cmp(&probabilities[b])
655 .unwrap_or(std::cmp::Ordering::Equal)
656 });
657
658 let group_size = n / n_groups;
659 let remainder = n % n_groups;
660 let mut start = 0;
661
662 let mut chi2 = 0.0;
663 let mut reliability_bins = Vec::with_capacity(n_groups);
664 let mut bin_counts = Vec::with_capacity(n_groups);
665
666 for g in 0..n_groups {
667 let sz = group_size + if g < remainder { 1 } else { 0 };
668 let group = &sorted_idx[start..start + sz];
669 start += sz;
670
671 let ng = group.len();
672 if ng == 0 {
673 continue;
674 }
675 let o_g: f64 = group.iter().map(|&i| y[i]).sum();
676 let e_g: f64 = group.iter().map(|&i| probabilities[i]).sum();
677 let p_bar = e_g / ng as f64;
678 let mean_obs = o_g / ng as f64;
679
680 reliability_bins.push((p_bar, mean_obs));
681 bin_counts.push(ng);
682
683 let denom = ng as f64 * p_bar * (1.0 - p_bar);
684 if denom > 1e-15 {
685 chi2 += (o_g - e_g).powi(2) / denom;
686 }
687 }
688
689 (chi2, reliability_bins, bin_counts)
690}
691
692fn compute_equal_width_ece(
694 probabilities: &[f64],
695 y: &[f64],
696 n: usize,
697 n_bins: usize,
698) -> (f64, f64, Vec<f64>) {
699 let mut bin_sum_y = vec![0.0; n_bins];
700 let mut bin_sum_p = vec![0.0; n_bins];
701 let mut bin_count = vec![0usize; n_bins];
702
703 for i in 0..n {
704 let b = ((probabilities[i] * n_bins as f64).floor() as usize).min(n_bins - 1);
705 bin_sum_y[b] += y[i];
706 bin_sum_p[b] += probabilities[i];
707 bin_count[b] += 1;
708 }
709
710 let mut ece = 0.0;
711 let mut mce: f64 = 0.0;
712 let mut bin_ece_contributions = vec![0.0; n_bins];
713
714 for b in 0..n_bins {
715 if bin_count[b] == 0 {
716 continue;
717 }
718 let gap = (bin_sum_y[b] / bin_count[b] as f64 - bin_sum_p[b] / bin_count[b] as f64).abs();
719 let contrib = bin_count[b] as f64 / n as f64 * gap;
720 bin_ece_contributions[b] = contrib;
721 ece += contrib;
722 if gap > mce {
723 mce = gap;
724 }
725 }
726
727 (ece, mce, bin_ece_contributions)
728}
729
730fn bootstrap_logistic_stability(
732 data: &FdMatrix,
733 y: &[f64],
734 scalar_covariates: Option<&FdMatrix>,
735 n: usize,
736 ncomp: usize,
737 n_boot: usize,
738 rng: &mut StdRng,
739) -> (Vec<Vec<f64>>, Vec<Vec<f64>>, Vec<Vec<f64>>, Vec<f64>) {
740 let mut all_beta_t: Vec<Vec<f64>> = Vec::new();
741 let mut all_coefs: Vec<Vec<f64>> = Vec::new();
742 let mut all_abs_coefs: Vec<Vec<f64>> = Vec::new();
743 let mut all_metrics: Vec<f64> = Vec::new();
744
745 for _ in 0..n_boot {
746 let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
747 let boot_data = subsample_rows(data, &idx);
748 let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
749 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
750 let has_both = boot_y.iter().any(|&v| v < 0.5) && boot_y.iter().any(|&v| v >= 0.5);
751 if !has_both {
752 continue;
753 }
754 if let Some(refit) =
755 functional_logistic(&boot_data, &boot_y, boot_sc.as_ref(), ncomp, 25, 1e-6)
756 {
757 all_beta_t.push(refit.beta_t.clone());
758 let coefs: Vec<f64> = (0..ncomp).map(|k| refit.coefficients[1 + k]).collect();
759 all_abs_coefs.push(coefs.iter().map(|c| c.abs()).collect());
760 all_coefs.push(coefs);
761 all_metrics.push(refit.accuracy);
762 }
763 }
764
765 (all_beta_t, all_coefs, all_abs_coefs, all_metrics)
766}
767
768fn bootstrap_logistic_coefs(
770 data: &FdMatrix,
771 y: &[f64],
772 scalar_covariates: Option<&FdMatrix>,
773 n: usize,
774 ncomp: usize,
775 n_boot: usize,
776 rng: &mut StdRng,
777) -> Vec<Vec<f64>> {
778 let mut boot_coefs = Vec::new();
779 for _ in 0..n_boot {
780 let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
781 let boot_data = subsample_rows(data, &idx);
782 let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
783 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
784 let has_both = boot_y.iter().any(|&v| v < 0.5) && boot_y.iter().any(|&v| v >= 0.5);
785 if !has_both {
786 continue;
787 }
788 if let Some(refit) =
789 functional_logistic(&boot_data, &boot_y, boot_sc.as_ref(), ncomp, 25, 1e-6)
790 {
791 boot_coefs.push((0..ncomp).map(|k| refit.coefficients[1 + k]).collect());
792 }
793 }
794 boot_coefs
795}