1use super::helpers::{
4 anchor_beam_search, beta_depth_from_bootstrap, build_stability_result,
5 calibration_gap_weighted, compute_score_depths, conformal_quantile_and_coverage,
6 predict_from_scores, project_scores, subsample_rows, validate_conformal_inputs,
7};
8use crate::error::FdarError;
9use crate::matrix::FdMatrix;
10use crate::scalar_on_function::{
11 fregre_lm, functional_logistic, sigmoid, FregreLmResult, FunctionalLogisticResult,
12};
13use rand::prelude::*;
14
15#[derive(Debug, Clone, PartialEq)]
21#[non_exhaustive]
22pub struct CalibrationDiagnosticsResult {
23 pub brier_score: f64,
25 pub log_loss: f64,
27 pub hosmer_lemeshow_chi2: f64,
29 pub hosmer_lemeshow_df: usize,
31 pub n_groups: usize,
33 pub reliability_bins: Vec<(f64, f64)>,
35 pub bin_counts: Vec<usize>,
37}
38
39#[must_use = "expensive computation whose result should not be discarded"]
47pub fn calibration_diagnostics(
48 fit: &FunctionalLogisticResult,
49 y: &[f64],
50 n_groups: usize,
51) -> Result<CalibrationDiagnosticsResult, FdarError> {
52 let n = fit.probabilities.len();
53 if n == 0 {
54 return Err(FdarError::InvalidDimension {
55 parameter: "probabilities",
56 expected: ">0 length".into(),
57 actual: "0".into(),
58 });
59 }
60 if n != y.len() {
61 return Err(FdarError::InvalidDimension {
62 parameter: "y",
63 expected: format!("{n} (matching probabilities)"),
64 actual: format!("{}", y.len()),
65 });
66 }
67 if n_groups < 2 {
68 return Err(FdarError::InvalidParameter {
69 parameter: "n_groups",
70 message: "must be >= 2".into(),
71 });
72 }
73
74 let brier_score: f64 = fit
76 .probabilities
77 .iter()
78 .zip(y)
79 .map(|(&p, &yi)| (p - yi).powi(2))
80 .sum::<f64>()
81 / n as f64;
82
83 let log_loss: f64 = -fit
85 .probabilities
86 .iter()
87 .zip(y)
88 .map(|(&p, &yi)| {
89 let p_clip = p.clamp(1e-15, 1.0 - 1e-15);
90 yi * p_clip.ln() + (1.0 - yi) * (1.0 - p_clip).ln()
91 })
92 .sum::<f64>()
93 / n as f64;
94
95 let (hosmer_lemeshow_chi2, reliability_bins, bin_counts) =
96 hosmer_lemeshow_computation(&fit.probabilities, y, n, n_groups);
97
98 let actual_groups = bin_counts.len();
99 let hosmer_lemeshow_df = if actual_groups > 2 {
100 actual_groups - 2
101 } else {
102 1
103 };
104
105 Ok(CalibrationDiagnosticsResult {
106 brier_score,
107 log_loss,
108 hosmer_lemeshow_chi2,
109 hosmer_lemeshow_df,
110 n_groups: actual_groups,
111 reliability_bins,
112 bin_counts,
113 })
114}
115
116#[derive(Debug, Clone, PartialEq)]
122#[non_exhaustive]
123pub struct EceResult {
124 pub ece: f64,
126 pub mce: f64,
128 pub ace: f64,
130 pub n_bins: usize,
132 pub bin_ece_contributions: Vec<f64>,
134}
135
136#[must_use = "expensive computation whose result should not be discarded"]
149pub fn expected_calibration_error(
150 fit: &FunctionalLogisticResult,
151 y: &[f64],
152 n_bins: usize,
153) -> Result<EceResult, FdarError> {
154 let n = fit.probabilities.len();
155 if n == 0 {
156 return Err(FdarError::InvalidDimension {
157 parameter: "probabilities",
158 expected: ">0 length".into(),
159 actual: "0".into(),
160 });
161 }
162 if n != y.len() {
163 return Err(FdarError::InvalidDimension {
164 parameter: "y",
165 expected: format!("{n} (matching probabilities)"),
166 actual: format!("{}", y.len()),
167 });
168 }
169 if n_bins == 0 {
170 return Err(FdarError::InvalidParameter {
171 parameter: "n_bins",
172 message: "must be > 0".into(),
173 });
174 }
175
176 let (ece, mce, bin_ece_contributions) =
177 compute_equal_width_ece(&fit.probabilities, y, n, n_bins);
178
179 let mut sorted_idx: Vec<usize> = (0..n).collect();
181 sorted_idx.sort_by(|&a, &b| {
182 fit.probabilities[a]
183 .partial_cmp(&fit.probabilities[b])
184 .unwrap_or(std::cmp::Ordering::Equal)
185 });
186 let group_size = n / n_bins.max(1);
187 let mut ace = 0.0;
188 let mut start = 0;
189 for g in 0..n_bins {
190 if start >= n {
191 break;
192 }
193 let end = if g < n_bins - 1 {
194 (start + group_size).min(n)
195 } else {
196 n
197 };
198 ace += calibration_gap_weighted(&sorted_idx[start..end], y, &fit.probabilities, n);
199 start = end;
200 }
201
202 Ok(EceResult {
203 ece,
204 mce,
205 ace,
206 n_bins,
207 bin_ece_contributions,
208 })
209}
210
211#[derive(Debug, Clone, PartialEq)]
217#[non_exhaustive]
218pub struct ConformalPredictionResult {
219 pub predictions: Vec<f64>,
221 pub lower: Vec<f64>,
223 pub upper: Vec<f64>,
225 pub residual_quantile: f64,
227 pub coverage: f64,
229 pub calibration_scores: Vec<f64>,
231}
232
233#[must_use = "expensive computation whose result should not be discarded"]
257pub fn conformal_prediction_residuals(
258 fit: &FregreLmResult,
259 train_data: &FdMatrix,
260 train_y: &[f64],
261 test_data: &FdMatrix,
262 scalar_covariates_train: Option<&FdMatrix>,
263 scalar_covariates_test: Option<&FdMatrix>,
264 cal_fraction: f64,
265 alpha: f64,
266 seed: u64,
267) -> Result<ConformalPredictionResult, FdarError> {
268 let (n, m) = train_data.shape();
269 let (n_test, m_test) = test_data.shape();
270 let ncomp = fit.ncomp;
271 let (_n_cal, n_proper) = validate_conformal_inputs(
272 n,
273 m,
274 n_test,
275 m_test,
276 train_y.len(),
277 ncomp,
278 cal_fraction,
279 alpha,
280 )
281 .ok_or_else(|| FdarError::InvalidParameter {
282 parameter: "conformal_prediction_residuals",
283 message: "invalid input dimensions or parameters".into(),
284 })?;
285
286 let mut rng = StdRng::seed_from_u64(seed);
288 let mut all_idx: Vec<usize> = (0..n).collect();
289 all_idx.shuffle(&mut rng);
290 let proper_idx = &all_idx[..n_proper];
291 let cal_idx = &all_idx[n_proper..];
292
293 let proper_data = subsample_rows(train_data, proper_idx);
295 let proper_y: Vec<f64> = proper_idx.iter().map(|&i| train_y[i]).collect();
296 let proper_sc = scalar_covariates_train.map(|sc| subsample_rows(sc, proper_idx));
297
298 let refit = fregre_lm(&proper_data, &proper_y, proper_sc.as_ref(), ncomp)?;
300
301 let cal_data = subsample_rows(train_data, cal_idx);
303 let cal_sc = scalar_covariates_train.map(|sc| subsample_rows(sc, cal_idx));
304 let cal_scores = project_scores(&cal_data, &refit.fpca.mean, &refit.fpca.rotation, ncomp);
305 let cal_preds = predict_from_scores(
306 &cal_scores,
307 &refit.coefficients,
308 &refit.gamma,
309 cal_sc.as_ref(),
310 ncomp,
311 );
312 let cal_n = cal_idx.len();
313
314 let calibration_scores: Vec<f64> = cal_idx
315 .iter()
316 .enumerate()
317 .map(|(i, &orig)| (train_y[orig] - cal_preds[i]).abs())
318 .collect();
319
320 let (residual_quantile, coverage) =
321 conformal_quantile_and_coverage(&calibration_scores, cal_n, alpha);
322
323 let test_scores = project_scores(test_data, &refit.fpca.mean, &refit.fpca.rotation, ncomp);
325 let predictions = predict_from_scores(
326 &test_scores,
327 &refit.coefficients,
328 &refit.gamma,
329 scalar_covariates_test,
330 ncomp,
331 );
332
333 let lower: Vec<f64> = predictions.iter().map(|&p| p - residual_quantile).collect();
334 let upper: Vec<f64> = predictions.iter().map(|&p| p + residual_quantile).collect();
335
336 Ok(ConformalPredictionResult {
337 predictions,
338 lower,
339 upper,
340 residual_quantile,
341 coverage,
342 calibration_scores,
343 })
344}
345
346#[derive(Debug, Clone, Copy, PartialEq, Eq)]
352#[non_exhaustive]
353pub enum DepthType {
354 FraimanMuniz,
355 ModifiedBand,
356 FunctionalSpatial,
357}
358
359#[derive(Debug, Clone, PartialEq)]
361#[non_exhaustive]
362pub struct RegressionDepthResult {
363 pub beta_depth: f64,
365 pub score_depths: Vec<f64>,
367 pub mean_score_depth: f64,
369 pub depth_type: DepthType,
371 pub n_boot_success: usize,
373}
374
375#[must_use = "expensive computation whose result should not be discarded"]
397pub fn regression_depth(
398 fit: &FregreLmResult,
399 data: &FdMatrix,
400 y: &[f64],
401 scalar_covariates: Option<&FdMatrix>,
402 n_boot: usize,
403 depth_type: DepthType,
404 seed: u64,
405) -> Result<RegressionDepthResult, FdarError> {
406 let (n, m) = data.shape();
407 if n < 4 {
408 return Err(FdarError::InvalidDimension {
409 parameter: "data",
410 expected: ">=4 rows".into(),
411 actual: format!("{n}"),
412 });
413 }
414 if m == 0 {
415 return Err(FdarError::InvalidDimension {
416 parameter: "data",
417 expected: ">0 columns".into(),
418 actual: "0".into(),
419 });
420 }
421 if n != y.len() {
422 return Err(FdarError::InvalidDimension {
423 parameter: "y",
424 expected: format!("{n} (matching data rows)"),
425 actual: format!("{}", y.len()),
426 });
427 }
428 if n_boot == 0 {
429 return Err(FdarError::InvalidParameter {
430 parameter: "n_boot",
431 message: "must be > 0".into(),
432 });
433 }
434 let ncomp = fit.ncomp;
435 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
436 let score_depths = compute_score_depths(&scores, depth_type);
437 if score_depths.is_empty() {
438 return Err(FdarError::ComputationFailed {
439 operation: "regression_depth",
440 detail: "score depth computation returned empty; try increasing ncomp or check that the score matrix has sufficient variability".into(),
441 });
442 }
443 let mean_score_depth = score_depths.iter().sum::<f64>() / score_depths.len() as f64;
444
445 let orig_coefs: Vec<f64> = (0..ncomp).map(|k| fit.coefficients[1 + k]).collect();
446 let mut rng = StdRng::seed_from_u64(seed);
447 let mut boot_coefs = Vec::new();
448 for _ in 0..n_boot {
449 let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
450 let boot_data = subsample_rows(data, &idx);
451 let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
452 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
453 if let Ok(refit) = fregre_lm(&boot_data, &boot_y, boot_sc.as_ref(), ncomp) {
454 boot_coefs.push((0..ncomp).map(|k| refit.coefficients[1 + k]).collect());
455 }
456 }
457
458 let beta_depth = beta_depth_from_bootstrap(&boot_coefs, &orig_coefs, ncomp, depth_type);
459
460 Ok(RegressionDepthResult {
461 beta_depth,
462 score_depths,
463 mean_score_depth,
464 depth_type,
465 n_boot_success: boot_coefs.len(),
466 })
467}
468
469#[must_use = "expensive computation whose result should not be discarded"]
479pub fn regression_depth_logistic(
480 fit: &FunctionalLogisticResult,
481 data: &FdMatrix,
482 y: &[f64],
483 scalar_covariates: Option<&FdMatrix>,
484 n_boot: usize,
485 depth_type: DepthType,
486 seed: u64,
487) -> Result<RegressionDepthResult, FdarError> {
488 let (n, m) = data.shape();
489 if n < 4 {
490 return Err(FdarError::InvalidDimension {
491 parameter: "data",
492 expected: ">=4 rows".into(),
493 actual: format!("{n}"),
494 });
495 }
496 if m == 0 {
497 return Err(FdarError::InvalidDimension {
498 parameter: "data",
499 expected: ">0 columns".into(),
500 actual: "0".into(),
501 });
502 }
503 if n != y.len() {
504 return Err(FdarError::InvalidDimension {
505 parameter: "y",
506 expected: format!("{n} (matching data rows)"),
507 actual: format!("{}", y.len()),
508 });
509 }
510 if n_boot == 0 {
511 return Err(FdarError::InvalidParameter {
512 parameter: "n_boot",
513 message: "must be > 0".into(),
514 });
515 }
516 let ncomp = fit.ncomp;
517 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
518 let score_depths = compute_score_depths(&scores, depth_type);
519 if score_depths.is_empty() {
520 return Err(FdarError::ComputationFailed {
521 operation: "regression_depth_logistic",
522 detail: "score depth computation returned empty; try increasing ncomp or check that the score matrix has sufficient variability".into(),
523 });
524 }
525 let mean_score_depth = score_depths.iter().sum::<f64>() / score_depths.len() as f64;
526
527 let orig_coefs: Vec<f64> = (0..ncomp).map(|k| fit.coefficients[1 + k]).collect();
528 let mut rng = StdRng::seed_from_u64(seed);
529 let boot_coefs =
530 bootstrap_logistic_coefs(data, y, scalar_covariates, n, ncomp, n_boot, &mut rng);
531
532 let beta_depth = beta_depth_from_bootstrap(&boot_coefs, &orig_coefs, ncomp, depth_type);
533
534 Ok(RegressionDepthResult {
535 beta_depth,
536 score_depths,
537 mean_score_depth,
538 depth_type,
539 n_boot_success: boot_coefs.len(),
540 })
541}
542
543#[derive(Debug, Clone, PartialEq)]
549#[non_exhaustive]
550pub struct StabilityAnalysisResult {
551 pub beta_t_std: Vec<f64>,
553 pub coefficient_std: Vec<f64>,
555 pub metric_std: f64,
557 pub beta_t_cv: Vec<f64>,
559 pub importance_stability: f64,
561 pub n_boot_success: usize,
563}
564
565#[must_use = "expensive computation whose result should not be discarded"]
578pub fn explanation_stability(
579 data: &FdMatrix,
580 y: &[f64],
581 scalar_covariates: Option<&FdMatrix>,
582 ncomp: usize,
583 n_boot: usize,
584 seed: u64,
585) -> Result<StabilityAnalysisResult, FdarError> {
586 let (n, m) = data.shape();
587 if n < 4 {
588 return Err(FdarError::InvalidDimension {
589 parameter: "data",
590 expected: ">=4 rows".into(),
591 actual: format!("{n}"),
592 });
593 }
594 if m == 0 {
595 return Err(FdarError::InvalidDimension {
596 parameter: "data",
597 expected: ">0 columns".into(),
598 actual: "0".into(),
599 });
600 }
601 if n != y.len() {
602 return Err(FdarError::InvalidDimension {
603 parameter: "y",
604 expected: format!("{n} (matching data rows)"),
605 actual: format!("{}", y.len()),
606 });
607 }
608 if n_boot < 2 {
609 return Err(FdarError::InvalidParameter {
610 parameter: "n_boot",
611 message: "must be >= 2".into(),
612 });
613 }
614 if ncomp == 0 {
615 return Err(FdarError::InvalidParameter {
616 parameter: "ncomp",
617 message: "must be > 0".into(),
618 });
619 }
620
621 let mut rng = StdRng::seed_from_u64(seed);
622 let mut all_beta_t: Vec<Vec<f64>> = Vec::new();
623 let mut all_coefs: Vec<Vec<f64>> = Vec::new();
624 let mut all_metrics: Vec<f64> = Vec::new();
625 let mut all_abs_coefs: Vec<Vec<f64>> = Vec::new();
626
627 for _ in 0..n_boot {
628 let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
629 let boot_data = subsample_rows(data, &idx);
630 let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
631 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
632 if let Ok(refit) = fregre_lm(&boot_data, &boot_y, boot_sc.as_ref(), ncomp) {
633 all_beta_t.push(refit.beta_t.clone());
634 let coefs: Vec<f64> = (0..ncomp).map(|k| refit.coefficients[1 + k]).collect();
635 all_abs_coefs.push(coefs.iter().map(|c| c.abs()).collect());
636 all_coefs.push(coefs);
637 all_metrics.push(refit.r_squared);
638 }
639 }
640
641 build_stability_result(
642 &all_beta_t,
643 &all_coefs,
644 &all_abs_coefs,
645 &all_metrics,
646 m,
647 ncomp,
648 )
649 .ok_or_else(|| FdarError::ComputationFailed {
650 operation: "explanation_stability",
651 detail: "insufficient successful bootstrap refits; try increasing n_boot or check that the model fits reliably on subsampled data".into(),
652 })
653}
654
655#[must_use = "expensive computation whose result should not be discarded"]
665pub fn explanation_stability_logistic(
666 data: &FdMatrix,
667 y: &[f64],
668 scalar_covariates: Option<&FdMatrix>,
669 ncomp: usize,
670 n_boot: usize,
671 seed: u64,
672) -> Result<StabilityAnalysisResult, FdarError> {
673 let (n, m) = data.shape();
674 if n < 4 {
675 return Err(FdarError::InvalidDimension {
676 parameter: "data",
677 expected: ">=4 rows".into(),
678 actual: format!("{n}"),
679 });
680 }
681 if m == 0 {
682 return Err(FdarError::InvalidDimension {
683 parameter: "data",
684 expected: ">0 columns".into(),
685 actual: "0".into(),
686 });
687 }
688 if n != y.len() {
689 return Err(FdarError::InvalidDimension {
690 parameter: "y",
691 expected: format!("{n} (matching data rows)"),
692 actual: format!("{}", y.len()),
693 });
694 }
695 if n_boot < 2 {
696 return Err(FdarError::InvalidParameter {
697 parameter: "n_boot",
698 message: "must be >= 2".into(),
699 });
700 }
701 if ncomp == 0 {
702 return Err(FdarError::InvalidParameter {
703 parameter: "ncomp",
704 message: "must be > 0".into(),
705 });
706 }
707
708 let mut rng = StdRng::seed_from_u64(seed);
709 let (all_beta_t, all_coefs, all_abs_coefs, all_metrics) =
710 bootstrap_logistic_stability(data, y, scalar_covariates, n, ncomp, n_boot, &mut rng);
711
712 build_stability_result(
713 &all_beta_t,
714 &all_coefs,
715 &all_abs_coefs,
716 &all_metrics,
717 m,
718 ncomp,
719 )
720 .ok_or_else(|| FdarError::ComputationFailed {
721 operation: "explanation_stability_logistic",
722 detail: "insufficient successful bootstrap refits; try increasing n_boot or check that the model fits reliably on subsampled data".into(),
723 })
724}
725
726#[derive(Debug, Clone, PartialEq)]
732pub struct AnchorCondition {
733 pub component: usize,
735 pub lower_bound: f64,
737 pub upper_bound: f64,
739}
740
741#[derive(Debug, Clone, PartialEq)]
743pub struct AnchorRule {
744 pub conditions: Vec<AnchorCondition>,
746 pub precision: f64,
748 pub coverage: f64,
750 pub n_matching: usize,
752}
753
754#[derive(Debug, Clone, PartialEq)]
756#[non_exhaustive]
757pub struct AnchorResult {
758 pub rule: AnchorRule,
760 pub observation: usize,
762 pub predicted_value: f64,
764}
765
766#[must_use = "expensive computation whose result should not be discarded"]
785pub fn anchor_explanation(
786 fit: &FregreLmResult,
787 data: &FdMatrix,
788 scalar_covariates: Option<&FdMatrix>,
789 observation: usize,
790 precision_threshold: f64,
791 n_bins: usize,
792) -> Result<AnchorResult, FdarError> {
793 let (n, m) = data.shape();
794 if n == 0 {
795 return Err(FdarError::InvalidDimension {
796 parameter: "data",
797 expected: ">0 rows".into(),
798 actual: "0".into(),
799 });
800 }
801 if m != fit.fpca.mean.len() {
802 return Err(FdarError::InvalidDimension {
803 parameter: "data",
804 expected: format!("{} columns", fit.fpca.mean.len()),
805 actual: format!("{m}"),
806 });
807 }
808 if observation >= n {
809 return Err(FdarError::InvalidParameter {
810 parameter: "observation",
811 message: format!("observation {observation} >= n {n}"),
812 });
813 }
814 if n_bins < 2 {
815 return Err(FdarError::InvalidParameter {
816 parameter: "n_bins",
817 message: "must be >= 2".into(),
818 });
819 }
820 let ncomp = fit.ncomp;
821 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
822 let obs_pred = fit.fitted_values[observation];
823 let tol = fit.residual_se;
824
825 let same_pred = |i: usize| -> bool {
827 let mut yhat = fit.coefficients[0];
828 for k in 0..ncomp {
829 yhat += fit.coefficients[1 + k] * scores[(i, k)];
830 }
831 if let Some(sc) = scalar_covariates {
832 for j in 0..fit.gamma.len() {
833 yhat += fit.gamma[j] * sc[(i, j)];
834 }
835 }
836 (yhat - obs_pred).abs() <= tol
837 };
838
839 let (rule, _) = anchor_beam_search(
840 &scores,
841 ncomp,
842 n,
843 observation,
844 precision_threshold,
845 n_bins,
846 &same_pred,
847 );
848
849 Ok(AnchorResult {
850 rule,
851 observation,
852 predicted_value: obs_pred,
853 })
854}
855
856#[must_use = "expensive computation whose result should not be discarded"]
866pub fn anchor_explanation_logistic(
867 fit: &FunctionalLogisticResult,
868 data: &FdMatrix,
869 scalar_covariates: Option<&FdMatrix>,
870 observation: usize,
871 precision_threshold: f64,
872 n_bins: usize,
873) -> Result<AnchorResult, FdarError> {
874 let (n, m) = data.shape();
875 if n == 0 {
876 return Err(FdarError::InvalidDimension {
877 parameter: "data",
878 expected: ">0 rows".into(),
879 actual: "0".into(),
880 });
881 }
882 if m != fit.fpca.mean.len() {
883 return Err(FdarError::InvalidDimension {
884 parameter: "data",
885 expected: format!("{} columns", fit.fpca.mean.len()),
886 actual: format!("{m}"),
887 });
888 }
889 if observation >= n {
890 return Err(FdarError::InvalidParameter {
891 parameter: "observation",
892 message: format!("observation {observation} >= n {n}"),
893 });
894 }
895 if n_bins < 2 {
896 return Err(FdarError::InvalidParameter {
897 parameter: "n_bins",
898 message: "must be >= 2".into(),
899 });
900 }
901 let ncomp = fit.ncomp;
902 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
903 let obs_class = fit.predicted_classes[observation];
904 let obs_prob = fit.probabilities[observation];
905 let p_scalar = fit.gamma.len();
906
907 let same_pred = |i: usize| -> bool {
909 let mut eta = fit.intercept;
910 for k in 0..ncomp {
911 eta += fit.coefficients[1 + k] * scores[(i, k)];
912 }
913 if let Some(sc) = scalar_covariates {
914 for j in 0..p_scalar {
915 eta += fit.gamma[j] * sc[(i, j)];
916 }
917 }
918 let pred_class = usize::from(sigmoid(eta) >= 0.5);
919 pred_class == obs_class
920 };
921
922 let (rule, _) = anchor_beam_search(
923 &scores,
924 ncomp,
925 n,
926 observation,
927 precision_threshold,
928 n_bins,
929 &same_pred,
930 );
931
932 Ok(AnchorResult {
933 rule,
934 observation,
935 predicted_value: obs_prob,
936 })
937}
938
939fn hosmer_lemeshow_computation(
945 probabilities: &[f64],
946 y: &[f64],
947 n: usize,
948 n_groups: usize,
949) -> (f64, Vec<(f64, f64)>, Vec<usize>) {
950 let mut sorted_idx: Vec<usize> = (0..n).collect();
951 sorted_idx.sort_by(|&a, &b| {
952 probabilities[a]
953 .partial_cmp(&probabilities[b])
954 .unwrap_or(std::cmp::Ordering::Equal)
955 });
956
957 let group_size = n / n_groups;
958 let remainder = n % n_groups;
959 let mut start = 0;
960
961 let mut chi2 = 0.0;
962 let mut reliability_bins = Vec::with_capacity(n_groups);
963 let mut bin_counts = Vec::with_capacity(n_groups);
964
965 for g in 0..n_groups {
966 let sz = group_size + usize::from(g < remainder);
967 let group = &sorted_idx[start..start + sz];
968 start += sz;
969
970 let ng = group.len();
971 if ng == 0 {
972 continue;
973 }
974 let o_g: f64 = group.iter().map(|&i| y[i]).sum();
975 let e_g: f64 = group.iter().map(|&i| probabilities[i]).sum();
976 let p_bar = e_g / ng as f64;
977 let mean_obs = o_g / ng as f64;
978
979 reliability_bins.push((p_bar, mean_obs));
980 bin_counts.push(ng);
981
982 let denom = ng as f64 * p_bar * (1.0 - p_bar);
983 if denom > 1e-15 {
984 chi2 += (o_g - e_g).powi(2) / denom;
985 }
986 }
987
988 (chi2, reliability_bins, bin_counts)
989}
990
991fn compute_equal_width_ece(
993 probabilities: &[f64],
994 y: &[f64],
995 n: usize,
996 n_bins: usize,
997) -> (f64, f64, Vec<f64>) {
998 let mut bin_sum_y = vec![0.0; n_bins];
999 let mut bin_sum_p = vec![0.0; n_bins];
1000 let mut bin_count = vec![0usize; n_bins];
1001
1002 for i in 0..n {
1003 let b = ((probabilities[i] * n_bins as f64).floor() as usize).min(n_bins - 1);
1004 bin_sum_y[b] += y[i];
1005 bin_sum_p[b] += probabilities[i];
1006 bin_count[b] += 1;
1007 }
1008
1009 let mut ece = 0.0;
1010 let mut mce: f64 = 0.0;
1011 let mut bin_ece_contributions = vec![0.0; n_bins];
1012
1013 for b in 0..n_bins {
1014 if bin_count[b] == 0 {
1015 continue;
1016 }
1017 let gap = (bin_sum_y[b] / bin_count[b] as f64 - bin_sum_p[b] / bin_count[b] as f64).abs();
1018 let contrib = bin_count[b] as f64 / n as f64 * gap;
1019 bin_ece_contributions[b] = contrib;
1020 ece += contrib;
1021 if gap > mce {
1022 mce = gap;
1023 }
1024 }
1025
1026 (ece, mce, bin_ece_contributions)
1027}
1028
1029fn bootstrap_logistic_stability(
1031 data: &FdMatrix,
1032 y: &[f64],
1033 scalar_covariates: Option<&FdMatrix>,
1034 n: usize,
1035 ncomp: usize,
1036 n_boot: usize,
1037 rng: &mut StdRng,
1038) -> (Vec<Vec<f64>>, Vec<Vec<f64>>, Vec<Vec<f64>>, Vec<f64>) {
1039 let mut all_beta_t: Vec<Vec<f64>> = Vec::new();
1040 let mut all_coefs: Vec<Vec<f64>> = Vec::new();
1041 let mut all_abs_coefs: Vec<Vec<f64>> = Vec::new();
1042 let mut all_metrics: Vec<f64> = Vec::new();
1043
1044 for _ in 0..n_boot {
1045 let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
1046 let boot_data = subsample_rows(data, &idx);
1047 let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
1048 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
1049 let has_both = boot_y.iter().any(|&v| v < 0.5) && boot_y.iter().any(|&v| v >= 0.5);
1050 if !has_both {
1051 continue;
1052 }
1053 if let Ok(refit) =
1054 functional_logistic(&boot_data, &boot_y, boot_sc.as_ref(), ncomp, 25, 1e-6)
1055 {
1056 all_beta_t.push(refit.beta_t.clone());
1057 let coefs: Vec<f64> = (0..ncomp).map(|k| refit.coefficients[1 + k]).collect();
1058 all_abs_coefs.push(coefs.iter().map(|c| c.abs()).collect());
1059 all_coefs.push(coefs);
1060 all_metrics.push(refit.accuracy);
1061 }
1062 }
1063
1064 (all_beta_t, all_coefs, all_abs_coefs, all_metrics)
1065}
1066
1067fn bootstrap_logistic_coefs(
1069 data: &FdMatrix,
1070 y: &[f64],
1071 scalar_covariates: Option<&FdMatrix>,
1072 n: usize,
1073 ncomp: usize,
1074 n_boot: usize,
1075 rng: &mut StdRng,
1076) -> Vec<Vec<f64>> {
1077 let mut boot_coefs = Vec::new();
1078 for _ in 0..n_boot {
1079 let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
1080 let boot_data = subsample_rows(data, &idx);
1081 let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
1082 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
1083 let has_both = boot_y.iter().any(|&v| v < 0.5) && boot_y.iter().any(|&v| v >= 0.5);
1084 if !has_both {
1085 continue;
1086 }
1087 if let Ok(refit) =
1088 functional_logistic(&boot_data, &boot_y, boot_sc.as_ref(), ncomp, 25, 1e-6)
1089 {
1090 boot_coefs.push((0..ncomp).map(|k| refit.coefficients[1 + k]).collect());
1091 }
1092 }
1093 boot_coefs
1094}