1use super::helpers::*;
4use crate::error::FdarError;
5use crate::matrix::FdMatrix;
6use crate::scalar_on_function::{
7 fregre_lm, functional_logistic, sigmoid, FregreLmResult, FunctionalLogisticResult,
8};
9use rand::prelude::*;
10
11#[derive(Debug, Clone, PartialEq)]
17pub struct CalibrationDiagnosticsResult {
18 pub brier_score: f64,
20 pub log_loss: f64,
22 pub hosmer_lemeshow_chi2: f64,
24 pub hosmer_lemeshow_df: usize,
26 pub n_groups: usize,
28 pub reliability_bins: Vec<(f64, f64)>,
30 pub bin_counts: Vec<usize>,
32}
33
34#[must_use = "expensive computation whose result should not be discarded"]
42pub fn calibration_diagnostics(
43 fit: &FunctionalLogisticResult,
44 y: &[f64],
45 n_groups: usize,
46) -> Result<CalibrationDiagnosticsResult, FdarError> {
47 let n = fit.probabilities.len();
48 if n == 0 {
49 return Err(FdarError::InvalidDimension {
50 parameter: "probabilities",
51 expected: ">0 length".into(),
52 actual: "0".into(),
53 });
54 }
55 if n != y.len() {
56 return Err(FdarError::InvalidDimension {
57 parameter: "y",
58 expected: format!("{n} (matching probabilities)"),
59 actual: format!("{}", y.len()),
60 });
61 }
62 if n_groups < 2 {
63 return Err(FdarError::InvalidParameter {
64 parameter: "n_groups",
65 message: "must be >= 2".into(),
66 });
67 }
68
69 let brier_score: f64 = fit
71 .probabilities
72 .iter()
73 .zip(y)
74 .map(|(&p, &yi)| (p - yi).powi(2))
75 .sum::<f64>()
76 / n as f64;
77
78 let log_loss: f64 = -fit
80 .probabilities
81 .iter()
82 .zip(y)
83 .map(|(&p, &yi)| {
84 let p_clip = p.clamp(1e-15, 1.0 - 1e-15);
85 yi * p_clip.ln() + (1.0 - yi) * (1.0 - p_clip).ln()
86 })
87 .sum::<f64>()
88 / n as f64;
89
90 let (hosmer_lemeshow_chi2, reliability_bins, bin_counts) =
91 hosmer_lemeshow_computation(&fit.probabilities, y, n, n_groups);
92
93 let actual_groups = bin_counts.len();
94 let hosmer_lemeshow_df = if actual_groups > 2 {
95 actual_groups - 2
96 } else {
97 1
98 };
99
100 Ok(CalibrationDiagnosticsResult {
101 brier_score,
102 log_loss,
103 hosmer_lemeshow_chi2,
104 hosmer_lemeshow_df,
105 n_groups: actual_groups,
106 reliability_bins,
107 bin_counts,
108 })
109}
110
111#[derive(Debug, Clone, PartialEq)]
117pub struct EceResult {
118 pub ece: f64,
120 pub mce: f64,
122 pub ace: f64,
124 pub n_bins: usize,
126 pub bin_ece_contributions: Vec<f64>,
128}
129
130#[must_use = "expensive computation whose result should not be discarded"]
143pub fn expected_calibration_error(
144 fit: &FunctionalLogisticResult,
145 y: &[f64],
146 n_bins: usize,
147) -> Result<EceResult, FdarError> {
148 let n = fit.probabilities.len();
149 if n == 0 {
150 return Err(FdarError::InvalidDimension {
151 parameter: "probabilities",
152 expected: ">0 length".into(),
153 actual: "0".into(),
154 });
155 }
156 if n != y.len() {
157 return Err(FdarError::InvalidDimension {
158 parameter: "y",
159 expected: format!("{n} (matching probabilities)"),
160 actual: format!("{}", y.len()),
161 });
162 }
163 if n_bins == 0 {
164 return Err(FdarError::InvalidParameter {
165 parameter: "n_bins",
166 message: "must be > 0".into(),
167 });
168 }
169
170 let (ece, mce, bin_ece_contributions) =
171 compute_equal_width_ece(&fit.probabilities, y, n, n_bins);
172
173 let mut sorted_idx: Vec<usize> = (0..n).collect();
175 sorted_idx.sort_by(|&a, &b| {
176 fit.probabilities[a]
177 .partial_cmp(&fit.probabilities[b])
178 .unwrap_or(std::cmp::Ordering::Equal)
179 });
180 let group_size = n / n_bins.max(1);
181 let mut ace = 0.0;
182 let mut start = 0;
183 for g in 0..n_bins {
184 if start >= n {
185 break;
186 }
187 let end = if g < n_bins - 1 {
188 (start + group_size).min(n)
189 } else {
190 n
191 };
192 ace += calibration_gap_weighted(&sorted_idx[start..end], y, &fit.probabilities, n);
193 start = end;
194 }
195
196 Ok(EceResult {
197 ece,
198 mce,
199 ace,
200 n_bins,
201 bin_ece_contributions,
202 })
203}
204
205#[derive(Debug, Clone, PartialEq)]
211pub struct ConformalPredictionResult {
212 pub predictions: Vec<f64>,
214 pub lower: Vec<f64>,
216 pub upper: Vec<f64>,
218 pub residual_quantile: f64,
220 pub coverage: f64,
222 pub calibration_scores: Vec<f64>,
224}
225
226#[must_use = "expensive computation whose result should not be discarded"]
250pub fn conformal_prediction_residuals(
251 fit: &FregreLmResult,
252 train_data: &FdMatrix,
253 train_y: &[f64],
254 test_data: &FdMatrix,
255 scalar_covariates_train: Option<&FdMatrix>,
256 scalar_covariates_test: Option<&FdMatrix>,
257 cal_fraction: f64,
258 alpha: f64,
259 seed: u64,
260) -> Result<ConformalPredictionResult, FdarError> {
261 let (n, m) = train_data.shape();
262 let (n_test, m_test) = test_data.shape();
263 let ncomp = fit.ncomp;
264 let (_n_cal, n_proper) = validate_conformal_inputs(
265 n,
266 m,
267 n_test,
268 m_test,
269 train_y.len(),
270 ncomp,
271 cal_fraction,
272 alpha,
273 )
274 .ok_or_else(|| FdarError::InvalidParameter {
275 parameter: "conformal_prediction_residuals",
276 message: "invalid input dimensions or parameters".into(),
277 })?;
278
279 let mut rng = StdRng::seed_from_u64(seed);
281 let mut all_idx: Vec<usize> = (0..n).collect();
282 all_idx.shuffle(&mut rng);
283 let proper_idx = &all_idx[..n_proper];
284 let cal_idx = &all_idx[n_proper..];
285
286 let proper_data = subsample_rows(train_data, proper_idx);
288 let proper_y: Vec<f64> = proper_idx.iter().map(|&i| train_y[i]).collect();
289 let proper_sc = scalar_covariates_train.map(|sc| subsample_rows(sc, proper_idx));
290
291 let refit = fregre_lm(&proper_data, &proper_y, proper_sc.as_ref(), ncomp)?;
293
294 let cal_data = subsample_rows(train_data, cal_idx);
296 let cal_sc = scalar_covariates_train.map(|sc| subsample_rows(sc, cal_idx));
297 let cal_scores = project_scores(&cal_data, &refit.fpca.mean, &refit.fpca.rotation, ncomp);
298 let cal_preds = predict_from_scores(
299 &cal_scores,
300 &refit.coefficients,
301 &refit.gamma,
302 cal_sc.as_ref(),
303 ncomp,
304 );
305 let cal_n = cal_idx.len();
306
307 let calibration_scores: Vec<f64> = cal_idx
308 .iter()
309 .enumerate()
310 .map(|(i, &orig)| (train_y[orig] - cal_preds[i]).abs())
311 .collect();
312
313 let (residual_quantile, coverage) =
314 conformal_quantile_and_coverage(&calibration_scores, cal_n, alpha);
315
316 let test_scores = project_scores(test_data, &refit.fpca.mean, &refit.fpca.rotation, ncomp);
318 let predictions = predict_from_scores(
319 &test_scores,
320 &refit.coefficients,
321 &refit.gamma,
322 scalar_covariates_test,
323 ncomp,
324 );
325
326 let lower: Vec<f64> = predictions.iter().map(|&p| p - residual_quantile).collect();
327 let upper: Vec<f64> = predictions.iter().map(|&p| p + residual_quantile).collect();
328
329 Ok(ConformalPredictionResult {
330 predictions,
331 lower,
332 upper,
333 residual_quantile,
334 coverage,
335 calibration_scores,
336 })
337}
338
339#[derive(Debug, Clone, Copy, PartialEq, Eq)]
345pub enum DepthType {
346 FraimanMuniz,
347 ModifiedBand,
348 FunctionalSpatial,
349}
350
351#[derive(Debug, Clone, PartialEq)]
353pub struct RegressionDepthResult {
354 pub beta_depth: f64,
356 pub score_depths: Vec<f64>,
358 pub mean_score_depth: f64,
360 pub depth_type: DepthType,
362 pub n_boot_success: usize,
364}
365
366#[must_use = "expensive computation whose result should not be discarded"]
388pub fn regression_depth(
389 fit: &FregreLmResult,
390 data: &FdMatrix,
391 y: &[f64],
392 scalar_covariates: Option<&FdMatrix>,
393 n_boot: usize,
394 depth_type: DepthType,
395 seed: u64,
396) -> Result<RegressionDepthResult, FdarError> {
397 let (n, m) = data.shape();
398 if n < 4 {
399 return Err(FdarError::InvalidDimension {
400 parameter: "data",
401 expected: ">=4 rows".into(),
402 actual: format!("{n}"),
403 });
404 }
405 if m == 0 {
406 return Err(FdarError::InvalidDimension {
407 parameter: "data",
408 expected: ">0 columns".into(),
409 actual: "0".into(),
410 });
411 }
412 if n != y.len() {
413 return Err(FdarError::InvalidDimension {
414 parameter: "y",
415 expected: format!("{n} (matching data rows)"),
416 actual: format!("{}", y.len()),
417 });
418 }
419 if n_boot == 0 {
420 return Err(FdarError::InvalidParameter {
421 parameter: "n_boot",
422 message: "must be > 0".into(),
423 });
424 }
425 let ncomp = fit.ncomp;
426 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
427 let score_depths = compute_score_depths(&scores, depth_type);
428 if score_depths.is_empty() {
429 return Err(FdarError::ComputationFailed {
430 operation: "regression_depth",
431 detail: "score depth computation returned empty".into(),
432 });
433 }
434 let mean_score_depth = score_depths.iter().sum::<f64>() / score_depths.len() as f64;
435
436 let orig_coefs: Vec<f64> = (0..ncomp).map(|k| fit.coefficients[1 + k]).collect();
437 let mut rng = StdRng::seed_from_u64(seed);
438 let mut boot_coefs = Vec::new();
439 for _ in 0..n_boot {
440 let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
441 let boot_data = subsample_rows(data, &idx);
442 let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
443 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
444 if let Ok(refit) = fregre_lm(&boot_data, &boot_y, boot_sc.as_ref(), ncomp) {
445 boot_coefs.push((0..ncomp).map(|k| refit.coefficients[1 + k]).collect());
446 }
447 }
448
449 let beta_depth = beta_depth_from_bootstrap(&boot_coefs, &orig_coefs, ncomp, depth_type);
450
451 Ok(RegressionDepthResult {
452 beta_depth,
453 score_depths,
454 mean_score_depth,
455 depth_type,
456 n_boot_success: boot_coefs.len(),
457 })
458}
459
460#[must_use = "expensive computation whose result should not be discarded"]
470pub fn regression_depth_logistic(
471 fit: &FunctionalLogisticResult,
472 data: &FdMatrix,
473 y: &[f64],
474 scalar_covariates: Option<&FdMatrix>,
475 n_boot: usize,
476 depth_type: DepthType,
477 seed: u64,
478) -> Result<RegressionDepthResult, FdarError> {
479 let (n, m) = data.shape();
480 if n < 4 {
481 return Err(FdarError::InvalidDimension {
482 parameter: "data",
483 expected: ">=4 rows".into(),
484 actual: format!("{n}"),
485 });
486 }
487 if m == 0 {
488 return Err(FdarError::InvalidDimension {
489 parameter: "data",
490 expected: ">0 columns".into(),
491 actual: "0".into(),
492 });
493 }
494 if n != y.len() {
495 return Err(FdarError::InvalidDimension {
496 parameter: "y",
497 expected: format!("{n} (matching data rows)"),
498 actual: format!("{}", y.len()),
499 });
500 }
501 if n_boot == 0 {
502 return Err(FdarError::InvalidParameter {
503 parameter: "n_boot",
504 message: "must be > 0".into(),
505 });
506 }
507 let ncomp = fit.ncomp;
508 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
509 let score_depths = compute_score_depths(&scores, depth_type);
510 if score_depths.is_empty() {
511 return Err(FdarError::ComputationFailed {
512 operation: "regression_depth_logistic",
513 detail: "score depth computation returned empty".into(),
514 });
515 }
516 let mean_score_depth = score_depths.iter().sum::<f64>() / score_depths.len() as f64;
517
518 let orig_coefs: Vec<f64> = (0..ncomp).map(|k| fit.coefficients[1 + k]).collect();
519 let mut rng = StdRng::seed_from_u64(seed);
520 let boot_coefs =
521 bootstrap_logistic_coefs(data, y, scalar_covariates, n, ncomp, n_boot, &mut rng);
522
523 let beta_depth = beta_depth_from_bootstrap(&boot_coefs, &orig_coefs, ncomp, depth_type);
524
525 Ok(RegressionDepthResult {
526 beta_depth,
527 score_depths,
528 mean_score_depth,
529 depth_type,
530 n_boot_success: boot_coefs.len(),
531 })
532}
533
534#[derive(Debug, Clone, PartialEq)]
540pub struct StabilityAnalysisResult {
541 pub beta_t_std: Vec<f64>,
543 pub coefficient_std: Vec<f64>,
545 pub metric_std: f64,
547 pub beta_t_cv: Vec<f64>,
549 pub importance_stability: f64,
551 pub n_boot_success: usize,
553}
554
555#[must_use = "expensive computation whose result should not be discarded"]
568pub fn explanation_stability(
569 data: &FdMatrix,
570 y: &[f64],
571 scalar_covariates: Option<&FdMatrix>,
572 ncomp: usize,
573 n_boot: usize,
574 seed: u64,
575) -> Result<StabilityAnalysisResult, FdarError> {
576 let (n, m) = data.shape();
577 if n < 4 {
578 return Err(FdarError::InvalidDimension {
579 parameter: "data",
580 expected: ">=4 rows".into(),
581 actual: format!("{n}"),
582 });
583 }
584 if m == 0 {
585 return Err(FdarError::InvalidDimension {
586 parameter: "data",
587 expected: ">0 columns".into(),
588 actual: "0".into(),
589 });
590 }
591 if n != y.len() {
592 return Err(FdarError::InvalidDimension {
593 parameter: "y",
594 expected: format!("{n} (matching data rows)"),
595 actual: format!("{}", y.len()),
596 });
597 }
598 if n_boot < 2 {
599 return Err(FdarError::InvalidParameter {
600 parameter: "n_boot",
601 message: "must be >= 2".into(),
602 });
603 }
604 if ncomp == 0 {
605 return Err(FdarError::InvalidParameter {
606 parameter: "ncomp",
607 message: "must be > 0".into(),
608 });
609 }
610
611 let mut rng = StdRng::seed_from_u64(seed);
612 let mut all_beta_t: Vec<Vec<f64>> = Vec::new();
613 let mut all_coefs: Vec<Vec<f64>> = Vec::new();
614 let mut all_metrics: Vec<f64> = Vec::new();
615 let mut all_abs_coefs: Vec<Vec<f64>> = Vec::new();
616
617 for _ in 0..n_boot {
618 let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
619 let boot_data = subsample_rows(data, &idx);
620 let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
621 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
622 if let Ok(refit) = fregre_lm(&boot_data, &boot_y, boot_sc.as_ref(), ncomp) {
623 all_beta_t.push(refit.beta_t.clone());
624 let coefs: Vec<f64> = (0..ncomp).map(|k| refit.coefficients[1 + k]).collect();
625 all_abs_coefs.push(coefs.iter().map(|c| c.abs()).collect());
626 all_coefs.push(coefs);
627 all_metrics.push(refit.r_squared);
628 }
629 }
630
631 build_stability_result(
632 &all_beta_t,
633 &all_coefs,
634 &all_abs_coefs,
635 &all_metrics,
636 m,
637 ncomp,
638 )
639 .ok_or_else(|| FdarError::ComputationFailed {
640 operation: "explanation_stability",
641 detail: "insufficient successful bootstrap refits".into(),
642 })
643}
644
645#[must_use = "expensive computation whose result should not be discarded"]
655pub fn explanation_stability_logistic(
656 data: &FdMatrix,
657 y: &[f64],
658 scalar_covariates: Option<&FdMatrix>,
659 ncomp: usize,
660 n_boot: usize,
661 seed: u64,
662) -> Result<StabilityAnalysisResult, FdarError> {
663 let (n, m) = data.shape();
664 if n < 4 {
665 return Err(FdarError::InvalidDimension {
666 parameter: "data",
667 expected: ">=4 rows".into(),
668 actual: format!("{n}"),
669 });
670 }
671 if m == 0 {
672 return Err(FdarError::InvalidDimension {
673 parameter: "data",
674 expected: ">0 columns".into(),
675 actual: "0".into(),
676 });
677 }
678 if n != y.len() {
679 return Err(FdarError::InvalidDimension {
680 parameter: "y",
681 expected: format!("{n} (matching data rows)"),
682 actual: format!("{}", y.len()),
683 });
684 }
685 if n_boot < 2 {
686 return Err(FdarError::InvalidParameter {
687 parameter: "n_boot",
688 message: "must be >= 2".into(),
689 });
690 }
691 if ncomp == 0 {
692 return Err(FdarError::InvalidParameter {
693 parameter: "ncomp",
694 message: "must be > 0".into(),
695 });
696 }
697
698 let mut rng = StdRng::seed_from_u64(seed);
699 let (all_beta_t, all_coefs, all_abs_coefs, all_metrics) =
700 bootstrap_logistic_stability(data, y, scalar_covariates, n, ncomp, n_boot, &mut rng);
701
702 build_stability_result(
703 &all_beta_t,
704 &all_coefs,
705 &all_abs_coefs,
706 &all_metrics,
707 m,
708 ncomp,
709 )
710 .ok_or_else(|| FdarError::ComputationFailed {
711 operation: "explanation_stability_logistic",
712 detail: "insufficient successful bootstrap refits".into(),
713 })
714}
715
716#[derive(Debug, Clone, PartialEq)]
722pub struct AnchorCondition {
723 pub component: usize,
725 pub lower_bound: f64,
727 pub upper_bound: f64,
729}
730
731#[derive(Debug, Clone, PartialEq)]
733pub struct AnchorRule {
734 pub conditions: Vec<AnchorCondition>,
736 pub precision: f64,
738 pub coverage: f64,
740 pub n_matching: usize,
742}
743
744#[derive(Debug, Clone, PartialEq)]
746pub struct AnchorResult {
747 pub rule: AnchorRule,
749 pub observation: usize,
751 pub predicted_value: f64,
753}
754
755#[must_use = "expensive computation whose result should not be discarded"]
774pub fn anchor_explanation(
775 fit: &FregreLmResult,
776 data: &FdMatrix,
777 scalar_covariates: Option<&FdMatrix>,
778 observation: usize,
779 precision_threshold: f64,
780 n_bins: usize,
781) -> Result<AnchorResult, FdarError> {
782 let (n, m) = data.shape();
783 if n == 0 {
784 return Err(FdarError::InvalidDimension {
785 parameter: "data",
786 expected: ">0 rows".into(),
787 actual: "0".into(),
788 });
789 }
790 if m != fit.fpca.mean.len() {
791 return Err(FdarError::InvalidDimension {
792 parameter: "data",
793 expected: format!("{} columns", fit.fpca.mean.len()),
794 actual: format!("{m}"),
795 });
796 }
797 if observation >= n {
798 return Err(FdarError::InvalidParameter {
799 parameter: "observation",
800 message: format!("observation {observation} >= n {n}"),
801 });
802 }
803 if n_bins < 2 {
804 return Err(FdarError::InvalidParameter {
805 parameter: "n_bins",
806 message: "must be >= 2".into(),
807 });
808 }
809 let ncomp = fit.ncomp;
810 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
811 let obs_pred = fit.fitted_values[observation];
812 let tol = fit.residual_se;
813
814 let same_pred = |i: usize| -> bool {
816 let mut yhat = fit.coefficients[0];
817 for k in 0..ncomp {
818 yhat += fit.coefficients[1 + k] * scores[(i, k)];
819 }
820 if let Some(sc) = scalar_covariates {
821 for j in 0..fit.gamma.len() {
822 yhat += fit.gamma[j] * sc[(i, j)];
823 }
824 }
825 (yhat - obs_pred).abs() <= tol
826 };
827
828 let (rule, _) = anchor_beam_search(
829 &scores,
830 ncomp,
831 n,
832 observation,
833 precision_threshold,
834 n_bins,
835 &same_pred,
836 );
837
838 Ok(AnchorResult {
839 rule,
840 observation,
841 predicted_value: obs_pred,
842 })
843}
844
845#[must_use = "expensive computation whose result should not be discarded"]
855pub fn anchor_explanation_logistic(
856 fit: &FunctionalLogisticResult,
857 data: &FdMatrix,
858 scalar_covariates: Option<&FdMatrix>,
859 observation: usize,
860 precision_threshold: f64,
861 n_bins: usize,
862) -> Result<AnchorResult, FdarError> {
863 let (n, m) = data.shape();
864 if n == 0 {
865 return Err(FdarError::InvalidDimension {
866 parameter: "data",
867 expected: ">0 rows".into(),
868 actual: "0".into(),
869 });
870 }
871 if m != fit.fpca.mean.len() {
872 return Err(FdarError::InvalidDimension {
873 parameter: "data",
874 expected: format!("{} columns", fit.fpca.mean.len()),
875 actual: format!("{m}"),
876 });
877 }
878 if observation >= n {
879 return Err(FdarError::InvalidParameter {
880 parameter: "observation",
881 message: format!("observation {observation} >= n {n}"),
882 });
883 }
884 if n_bins < 2 {
885 return Err(FdarError::InvalidParameter {
886 parameter: "n_bins",
887 message: "must be >= 2".into(),
888 });
889 }
890 let ncomp = fit.ncomp;
891 let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
892 let obs_class = fit.predicted_classes[observation];
893 let obs_prob = fit.probabilities[observation];
894 let p_scalar = fit.gamma.len();
895
896 let same_pred = |i: usize| -> bool {
898 let mut eta = fit.intercept;
899 for k in 0..ncomp {
900 eta += fit.coefficients[1 + k] * scores[(i, k)];
901 }
902 if let Some(sc) = scalar_covariates {
903 for j in 0..p_scalar {
904 eta += fit.gamma[j] * sc[(i, j)];
905 }
906 }
907 let pred_class = if sigmoid(eta) >= 0.5 { 1usize } else { 0usize };
908 pred_class == obs_class
909 };
910
911 let (rule, _) = anchor_beam_search(
912 &scores,
913 ncomp,
914 n,
915 observation,
916 precision_threshold,
917 n_bins,
918 &same_pred,
919 );
920
921 Ok(AnchorResult {
922 rule,
923 observation,
924 predicted_value: obs_prob,
925 })
926}
927
928fn hosmer_lemeshow_computation(
934 probabilities: &[f64],
935 y: &[f64],
936 n: usize,
937 n_groups: usize,
938) -> (f64, Vec<(f64, f64)>, Vec<usize>) {
939 let mut sorted_idx: Vec<usize> = (0..n).collect();
940 sorted_idx.sort_by(|&a, &b| {
941 probabilities[a]
942 .partial_cmp(&probabilities[b])
943 .unwrap_or(std::cmp::Ordering::Equal)
944 });
945
946 let group_size = n / n_groups;
947 let remainder = n % n_groups;
948 let mut start = 0;
949
950 let mut chi2 = 0.0;
951 let mut reliability_bins = Vec::with_capacity(n_groups);
952 let mut bin_counts = Vec::with_capacity(n_groups);
953
954 for g in 0..n_groups {
955 let sz = group_size + if g < remainder { 1 } else { 0 };
956 let group = &sorted_idx[start..start + sz];
957 start += sz;
958
959 let ng = group.len();
960 if ng == 0 {
961 continue;
962 }
963 let o_g: f64 = group.iter().map(|&i| y[i]).sum();
964 let e_g: f64 = group.iter().map(|&i| probabilities[i]).sum();
965 let p_bar = e_g / ng as f64;
966 let mean_obs = o_g / ng as f64;
967
968 reliability_bins.push((p_bar, mean_obs));
969 bin_counts.push(ng);
970
971 let denom = ng as f64 * p_bar * (1.0 - p_bar);
972 if denom > 1e-15 {
973 chi2 += (o_g - e_g).powi(2) / denom;
974 }
975 }
976
977 (chi2, reliability_bins, bin_counts)
978}
979
980fn compute_equal_width_ece(
982 probabilities: &[f64],
983 y: &[f64],
984 n: usize,
985 n_bins: usize,
986) -> (f64, f64, Vec<f64>) {
987 let mut bin_sum_y = vec![0.0; n_bins];
988 let mut bin_sum_p = vec![0.0; n_bins];
989 let mut bin_count = vec![0usize; n_bins];
990
991 for i in 0..n {
992 let b = ((probabilities[i] * n_bins as f64).floor() as usize).min(n_bins - 1);
993 bin_sum_y[b] += y[i];
994 bin_sum_p[b] += probabilities[i];
995 bin_count[b] += 1;
996 }
997
998 let mut ece = 0.0;
999 let mut mce: f64 = 0.0;
1000 let mut bin_ece_contributions = vec![0.0; n_bins];
1001
1002 for b in 0..n_bins {
1003 if bin_count[b] == 0 {
1004 continue;
1005 }
1006 let gap = (bin_sum_y[b] / bin_count[b] as f64 - bin_sum_p[b] / bin_count[b] as f64).abs();
1007 let contrib = bin_count[b] as f64 / n as f64 * gap;
1008 bin_ece_contributions[b] = contrib;
1009 ece += contrib;
1010 if gap > mce {
1011 mce = gap;
1012 }
1013 }
1014
1015 (ece, mce, bin_ece_contributions)
1016}
1017
1018fn bootstrap_logistic_stability(
1020 data: &FdMatrix,
1021 y: &[f64],
1022 scalar_covariates: Option<&FdMatrix>,
1023 n: usize,
1024 ncomp: usize,
1025 n_boot: usize,
1026 rng: &mut StdRng,
1027) -> (Vec<Vec<f64>>, Vec<Vec<f64>>, Vec<Vec<f64>>, Vec<f64>) {
1028 let mut all_beta_t: Vec<Vec<f64>> = Vec::new();
1029 let mut all_coefs: Vec<Vec<f64>> = Vec::new();
1030 let mut all_abs_coefs: Vec<Vec<f64>> = Vec::new();
1031 let mut all_metrics: Vec<f64> = Vec::new();
1032
1033 for _ in 0..n_boot {
1034 let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
1035 let boot_data = subsample_rows(data, &idx);
1036 let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
1037 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
1038 let has_both = boot_y.iter().any(|&v| v < 0.5) && boot_y.iter().any(|&v| v >= 0.5);
1039 if !has_both {
1040 continue;
1041 }
1042 if let Ok(refit) =
1043 functional_logistic(&boot_data, &boot_y, boot_sc.as_ref(), ncomp, 25, 1e-6)
1044 {
1045 all_beta_t.push(refit.beta_t.clone());
1046 let coefs: Vec<f64> = (0..ncomp).map(|k| refit.coefficients[1 + k]).collect();
1047 all_abs_coefs.push(coefs.iter().map(|c| c.abs()).collect());
1048 all_coefs.push(coefs);
1049 all_metrics.push(refit.accuracy);
1050 }
1051 }
1052
1053 (all_beta_t, all_coefs, all_abs_coefs, all_metrics)
1054}
1055
1056fn bootstrap_logistic_coefs(
1058 data: &FdMatrix,
1059 y: &[f64],
1060 scalar_covariates: Option<&FdMatrix>,
1061 n: usize,
1062 ncomp: usize,
1063 n_boot: usize,
1064 rng: &mut StdRng,
1065) -> Vec<Vec<f64>> {
1066 let mut boot_coefs = Vec::new();
1067 for _ in 0..n_boot {
1068 let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
1069 let boot_data = subsample_rows(data, &idx);
1070 let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
1071 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
1072 let has_both = boot_y.iter().any(|&v| v < 0.5) && boot_y.iter().any(|&v| v >= 0.5);
1073 if !has_both {
1074 continue;
1075 }
1076 if let Ok(refit) =
1077 functional_logistic(&boot_data, &boot_y, boot_sc.as_ref(), ncomp, 25, 1e-6)
1078 {
1079 boot_coefs.push((0..ncomp).map(|k| refit.coefficients[1 + k]).collect());
1080 }
1081 }
1082 boot_coefs
1083}