1use super::helpers::{
4 anchor_beam_search, beta_depth_from_bootstrap, build_stability_result,
5 calibration_gap_weighted, compute_score_depths, conformal_quantile_and_coverage,
6 predict_from_scores, project_scores, subsample_rows, validate_conformal_inputs,
7};
8use crate::error::FdarError;
9use crate::matrix::FdMatrix;
10use crate::scalar_on_function::{
11 fregre_lm, functional_logistic, sigmoid, FregreLmResult, FunctionalLogisticResult,
12};
13use rand::prelude::*;
14
15#[derive(Debug, Clone, PartialEq)]
21#[non_exhaustive]
22pub struct CalibrationDiagnosticsResult {
23 pub brier_score: f64,
25 pub log_loss: f64,
27 pub hosmer_lemeshow_chi2: f64,
29 pub hosmer_lemeshow_df: usize,
31 pub n_groups: usize,
33 pub reliability_bins: Vec<(f64, f64)>,
35 pub bin_counts: Vec<usize>,
37}
38
39#[must_use = "expensive computation whose result should not be discarded"]
47pub fn calibration_diagnostics(
48 fit: &FunctionalLogisticResult,
49 y: &[f64],
50 n_groups: usize,
51) -> Result<CalibrationDiagnosticsResult, FdarError> {
52 let n = fit.probabilities.len();
53 if n == 0 {
54 return Err(FdarError::InvalidDimension {
55 parameter: "probabilities",
56 expected: ">0 length".into(),
57 actual: "0".into(),
58 });
59 }
60 if n != y.len() {
61 return Err(FdarError::InvalidDimension {
62 parameter: "y",
63 expected: format!("{n} (matching probabilities)"),
64 actual: format!("{}", y.len()),
65 });
66 }
67 if n_groups < 2 {
68 return Err(FdarError::InvalidParameter {
69 parameter: "n_groups",
70 message: "must be >= 2".into(),
71 });
72 }
73
74 let brier_score: f64 = fit
76 .probabilities
77 .iter()
78 .zip(y)
79 .map(|(&p, &yi)| (p - yi).powi(2))
80 .sum::<f64>()
81 / n as f64;
82
83 let log_loss: f64 = -fit
85 .probabilities
86 .iter()
87 .zip(y)
88 .map(|(&p, &yi)| {
89 let p_clip = p.clamp(1e-15, 1.0 - 1e-15);
90 yi * p_clip.ln() + (1.0 - yi) * (1.0 - p_clip).ln()
91 })
92 .sum::<f64>()
93 / n as f64;
94
95 let (hosmer_lemeshow_chi2, reliability_bins, bin_counts) =
96 hosmer_lemeshow_computation(&fit.probabilities, y, n, n_groups);
97
98 let actual_groups = bin_counts.len();
99 let hosmer_lemeshow_df = if actual_groups > 2 {
100 actual_groups - 2
101 } else {
102 1
103 };
104
105 Ok(CalibrationDiagnosticsResult {
106 brier_score,
107 log_loss,
108 hosmer_lemeshow_chi2,
109 hosmer_lemeshow_df,
110 n_groups: actual_groups,
111 reliability_bins,
112 bin_counts,
113 })
114}
115
116#[derive(Debug, Clone, PartialEq)]
122#[non_exhaustive]
123pub struct EceResult {
124 pub ece: f64,
126 pub mce: f64,
128 pub ace: f64,
130 pub n_bins: usize,
132 pub bin_ece_contributions: Vec<f64>,
134}
135
136#[must_use = "expensive computation whose result should not be discarded"]
149pub fn expected_calibration_error(
150 fit: &FunctionalLogisticResult,
151 y: &[f64],
152 n_bins: usize,
153) -> Result<EceResult, FdarError> {
154 let n = fit.probabilities.len();
155 if n == 0 {
156 return Err(FdarError::InvalidDimension {
157 parameter: "probabilities",
158 expected: ">0 length".into(),
159 actual: "0".into(),
160 });
161 }
162 if n != y.len() {
163 return Err(FdarError::InvalidDimension {
164 parameter: "y",
165 expected: format!("{n} (matching probabilities)"),
166 actual: format!("{}", y.len()),
167 });
168 }
169 if n_bins == 0 {
170 return Err(FdarError::InvalidParameter {
171 parameter: "n_bins",
172 message: "must be > 0".into(),
173 });
174 }
175
176 let (ece, mce, bin_ece_contributions) =
177 compute_equal_width_ece(&fit.probabilities, y, n, n_bins);
178
179 let mut sorted_idx: Vec<usize> = (0..n).collect();
181 sorted_idx.sort_by(|&a, &b| {
182 fit.probabilities[a]
183 .partial_cmp(&fit.probabilities[b])
184 .unwrap_or(std::cmp::Ordering::Equal)
185 });
186 let group_size = n / n_bins.max(1);
187 let mut ace = 0.0;
188 let mut start = 0;
189 for g in 0..n_bins {
190 if start >= n {
191 break;
192 }
193 let end = if g < n_bins - 1 {
194 (start + group_size).min(n)
195 } else {
196 n
197 };
198 ace += calibration_gap_weighted(&sorted_idx[start..end], y, &fit.probabilities, n);
199 start = end;
200 }
201
202 Ok(EceResult {
203 ece,
204 mce,
205 ace,
206 n_bins,
207 bin_ece_contributions,
208 })
209}
210
211#[derive(Debug, Clone, PartialEq)]
217#[non_exhaustive]
218pub struct ConformalPredictionResult {
219 pub predictions: Vec<f64>,
221 pub lower: Vec<f64>,
223 pub upper: Vec<f64>,
225 pub residual_quantile: f64,
227 pub coverage: f64,
229 pub calibration_scores: Vec<f64>,
231}
232
233#[must_use = "expensive computation whose result should not be discarded"]
257pub fn conformal_prediction_residuals(
258 fit: &FregreLmResult,
259 train_data: &FdMatrix,
260 train_y: &[f64],
261 test_data: &FdMatrix,
262 scalar_covariates_train: Option<&FdMatrix>,
263 scalar_covariates_test: Option<&FdMatrix>,
264 cal_fraction: f64,
265 alpha: f64,
266 seed: u64,
267) -> Result<ConformalPredictionResult, FdarError> {
268 let (n, m) = train_data.shape();
269 let (n_test, m_test) = test_data.shape();
270 let ncomp = fit.ncomp;
271 let (_n_cal, n_proper) = validate_conformal_inputs(
272 n,
273 m,
274 n_test,
275 m_test,
276 train_y.len(),
277 ncomp,
278 cal_fraction,
279 alpha,
280 )
281 .ok_or_else(|| FdarError::InvalidParameter {
282 parameter: "conformal_prediction_residuals",
283 message: "invalid input dimensions or parameters".into(),
284 })?;
285
286 let mut rng = StdRng::seed_from_u64(seed);
288 let mut all_idx: Vec<usize> = (0..n).collect();
289 all_idx.shuffle(&mut rng);
290 let proper_idx = &all_idx[..n_proper];
291 let cal_idx = &all_idx[n_proper..];
292
293 let proper_data = subsample_rows(train_data, proper_idx);
295 let proper_y: Vec<f64> = proper_idx.iter().map(|&i| train_y[i]).collect();
296 let proper_sc = scalar_covariates_train.map(|sc| subsample_rows(sc, proper_idx));
297
298 let refit = fregre_lm(&proper_data, &proper_y, proper_sc.as_ref(), ncomp)?;
300
301 let cal_data = subsample_rows(train_data, cal_idx);
303 let cal_sc = scalar_covariates_train.map(|sc| subsample_rows(sc, cal_idx));
304 let cal_scores = project_scores(
305 &cal_data,
306 &refit.fpca.mean,
307 &refit.fpca.rotation,
308 ncomp,
309 &refit.fpca.weights,
310 );
311 let cal_preds = predict_from_scores(
312 &cal_scores,
313 &refit.coefficients,
314 &refit.gamma,
315 cal_sc.as_ref(),
316 ncomp,
317 );
318 let cal_n = cal_idx.len();
319
320 let calibration_scores: Vec<f64> = cal_idx
321 .iter()
322 .enumerate()
323 .map(|(i, &orig)| (train_y[orig] - cal_preds[i]).abs())
324 .collect();
325
326 let (residual_quantile, coverage) =
327 conformal_quantile_and_coverage(&calibration_scores, cal_n, alpha);
328
329 let test_scores = project_scores(
331 test_data,
332 &refit.fpca.mean,
333 &refit.fpca.rotation,
334 ncomp,
335 &refit.fpca.weights,
336 );
337 let predictions = predict_from_scores(
338 &test_scores,
339 &refit.coefficients,
340 &refit.gamma,
341 scalar_covariates_test,
342 ncomp,
343 );
344
345 let lower: Vec<f64> = predictions.iter().map(|&p| p - residual_quantile).collect();
346 let upper: Vec<f64> = predictions.iter().map(|&p| p + residual_quantile).collect();
347
348 Ok(ConformalPredictionResult {
349 predictions,
350 lower,
351 upper,
352 residual_quantile,
353 coverage,
354 calibration_scores,
355 })
356}
357
358#[derive(Debug, Clone, Copy, PartialEq, Eq)]
364#[non_exhaustive]
365pub enum DepthType {
366 FraimanMuniz,
367 ModifiedBand,
368 FunctionalSpatial,
369}
370
371#[derive(Debug, Clone, PartialEq)]
373#[non_exhaustive]
374pub struct RegressionDepthResult {
375 pub beta_depth: f64,
377 pub score_depths: Vec<f64>,
379 pub mean_score_depth: f64,
381 pub depth_type: DepthType,
383 pub n_boot_success: usize,
385}
386
387#[must_use = "expensive computation whose result should not be discarded"]
409pub fn regression_depth(
410 fit: &FregreLmResult,
411 data: &FdMatrix,
412 y: &[f64],
413 scalar_covariates: Option<&FdMatrix>,
414 n_boot: usize,
415 depth_type: DepthType,
416 seed: u64,
417) -> Result<RegressionDepthResult, FdarError> {
418 let (n, m) = data.shape();
419 if n < 4 {
420 return Err(FdarError::InvalidDimension {
421 parameter: "data",
422 expected: ">=4 rows".into(),
423 actual: format!("{n}"),
424 });
425 }
426 if m == 0 {
427 return Err(FdarError::InvalidDimension {
428 parameter: "data",
429 expected: ">0 columns".into(),
430 actual: "0".into(),
431 });
432 }
433 if n != y.len() {
434 return Err(FdarError::InvalidDimension {
435 parameter: "y",
436 expected: format!("{n} (matching data rows)"),
437 actual: format!("{}", y.len()),
438 });
439 }
440 if n_boot == 0 {
441 return Err(FdarError::InvalidParameter {
442 parameter: "n_boot",
443 message: "must be > 0".into(),
444 });
445 }
446 let ncomp = fit.ncomp;
447 let scores = project_scores(
448 data,
449 &fit.fpca.mean,
450 &fit.fpca.rotation,
451 ncomp,
452 &fit.fpca.weights,
453 );
454 let score_depths = compute_score_depths(&scores, depth_type);
455 if score_depths.is_empty() {
456 return Err(FdarError::ComputationFailed {
457 operation: "regression_depth",
458 detail: "score depth computation returned empty; try increasing ncomp or check that the score matrix has sufficient variability".into(),
459 });
460 }
461 let mean_score_depth = score_depths.iter().sum::<f64>() / score_depths.len() as f64;
462
463 let orig_coefs: Vec<f64> = (0..ncomp).map(|k| fit.coefficients[1 + k]).collect();
464 let mut rng = StdRng::seed_from_u64(seed);
465 let mut boot_coefs = Vec::new();
466 for _ in 0..n_boot {
467 let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
468 let boot_data = subsample_rows(data, &idx);
469 let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
470 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
471 if let Ok(refit) = fregre_lm(&boot_data, &boot_y, boot_sc.as_ref(), ncomp) {
472 boot_coefs.push((0..ncomp).map(|k| refit.coefficients[1 + k]).collect());
473 }
474 }
475
476 let beta_depth = beta_depth_from_bootstrap(&boot_coefs, &orig_coefs, ncomp, depth_type);
477
478 Ok(RegressionDepthResult {
479 beta_depth,
480 score_depths,
481 mean_score_depth,
482 depth_type,
483 n_boot_success: boot_coefs.len(),
484 })
485}
486
487#[must_use = "expensive computation whose result should not be discarded"]
497pub fn regression_depth_logistic(
498 fit: &FunctionalLogisticResult,
499 data: &FdMatrix,
500 y: &[f64],
501 scalar_covariates: Option<&FdMatrix>,
502 n_boot: usize,
503 depth_type: DepthType,
504 seed: u64,
505) -> Result<RegressionDepthResult, FdarError> {
506 let (n, m) = data.shape();
507 if n < 4 {
508 return Err(FdarError::InvalidDimension {
509 parameter: "data",
510 expected: ">=4 rows".into(),
511 actual: format!("{n}"),
512 });
513 }
514 if m == 0 {
515 return Err(FdarError::InvalidDimension {
516 parameter: "data",
517 expected: ">0 columns".into(),
518 actual: "0".into(),
519 });
520 }
521 if n != y.len() {
522 return Err(FdarError::InvalidDimension {
523 parameter: "y",
524 expected: format!("{n} (matching data rows)"),
525 actual: format!("{}", y.len()),
526 });
527 }
528 if n_boot == 0 {
529 return Err(FdarError::InvalidParameter {
530 parameter: "n_boot",
531 message: "must be > 0".into(),
532 });
533 }
534 let ncomp = fit.ncomp;
535 let scores = project_scores(
536 data,
537 &fit.fpca.mean,
538 &fit.fpca.rotation,
539 ncomp,
540 &fit.fpca.weights,
541 );
542 let score_depths = compute_score_depths(&scores, depth_type);
543 if score_depths.is_empty() {
544 return Err(FdarError::ComputationFailed {
545 operation: "regression_depth_logistic",
546 detail: "score depth computation returned empty; try increasing ncomp or check that the score matrix has sufficient variability".into(),
547 });
548 }
549 let mean_score_depth = score_depths.iter().sum::<f64>() / score_depths.len() as f64;
550
551 let orig_coefs: Vec<f64> = (0..ncomp).map(|k| fit.coefficients[1 + k]).collect();
552 let mut rng = StdRng::seed_from_u64(seed);
553 let boot_coefs =
554 bootstrap_logistic_coefs(data, y, scalar_covariates, n, ncomp, n_boot, &mut rng);
555
556 let beta_depth = beta_depth_from_bootstrap(&boot_coefs, &orig_coefs, ncomp, depth_type);
557
558 Ok(RegressionDepthResult {
559 beta_depth,
560 score_depths,
561 mean_score_depth,
562 depth_type,
563 n_boot_success: boot_coefs.len(),
564 })
565}
566
567#[derive(Debug, Clone, PartialEq)]
573#[non_exhaustive]
574pub struct StabilityAnalysisResult {
575 pub beta_t_std: Vec<f64>,
577 pub coefficient_std: Vec<f64>,
579 pub metric_std: f64,
581 pub beta_t_cv: Vec<f64>,
583 pub importance_stability: f64,
585 pub n_boot_success: usize,
587}
588
589#[must_use = "expensive computation whose result should not be discarded"]
602pub fn explanation_stability(
603 data: &FdMatrix,
604 y: &[f64],
605 scalar_covariates: Option<&FdMatrix>,
606 ncomp: usize,
607 n_boot: usize,
608 seed: u64,
609) -> Result<StabilityAnalysisResult, FdarError> {
610 let (n, m) = data.shape();
611 if n < 4 {
612 return Err(FdarError::InvalidDimension {
613 parameter: "data",
614 expected: ">=4 rows".into(),
615 actual: format!("{n}"),
616 });
617 }
618 if m == 0 {
619 return Err(FdarError::InvalidDimension {
620 parameter: "data",
621 expected: ">0 columns".into(),
622 actual: "0".into(),
623 });
624 }
625 if n != y.len() {
626 return Err(FdarError::InvalidDimension {
627 parameter: "y",
628 expected: format!("{n} (matching data rows)"),
629 actual: format!("{}", y.len()),
630 });
631 }
632 if n_boot < 2 {
633 return Err(FdarError::InvalidParameter {
634 parameter: "n_boot",
635 message: "must be >= 2".into(),
636 });
637 }
638 if ncomp == 0 {
639 return Err(FdarError::InvalidParameter {
640 parameter: "ncomp",
641 message: "must be > 0".into(),
642 });
643 }
644
645 let mut rng = StdRng::seed_from_u64(seed);
646 let mut all_beta_t: Vec<Vec<f64>> = Vec::new();
647 let mut all_coefs: Vec<Vec<f64>> = Vec::new();
648 let mut all_metrics: Vec<f64> = Vec::new();
649 let mut all_abs_coefs: Vec<Vec<f64>> = Vec::new();
650
651 for _ in 0..n_boot {
652 let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
653 let boot_data = subsample_rows(data, &idx);
654 let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
655 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
656 if let Ok(refit) = fregre_lm(&boot_data, &boot_y, boot_sc.as_ref(), ncomp) {
657 all_beta_t.push(refit.beta_t.clone());
658 let coefs: Vec<f64> = (0..ncomp).map(|k| refit.coefficients[1 + k]).collect();
659 all_abs_coefs.push(coefs.iter().map(|c| c.abs()).collect());
660 all_coefs.push(coefs);
661 all_metrics.push(refit.r_squared);
662 }
663 }
664
665 build_stability_result(
666 &all_beta_t,
667 &all_coefs,
668 &all_abs_coefs,
669 &all_metrics,
670 m,
671 ncomp,
672 )
673 .ok_or_else(|| FdarError::ComputationFailed {
674 operation: "explanation_stability",
675 detail: "insufficient successful bootstrap refits; try increasing n_boot or check that the model fits reliably on subsampled data".into(),
676 })
677}
678
679#[must_use = "expensive computation whose result should not be discarded"]
689pub fn explanation_stability_logistic(
690 data: &FdMatrix,
691 y: &[f64],
692 scalar_covariates: Option<&FdMatrix>,
693 ncomp: usize,
694 n_boot: usize,
695 seed: u64,
696) -> Result<StabilityAnalysisResult, FdarError> {
697 let (n, m) = data.shape();
698 if n < 4 {
699 return Err(FdarError::InvalidDimension {
700 parameter: "data",
701 expected: ">=4 rows".into(),
702 actual: format!("{n}"),
703 });
704 }
705 if m == 0 {
706 return Err(FdarError::InvalidDimension {
707 parameter: "data",
708 expected: ">0 columns".into(),
709 actual: "0".into(),
710 });
711 }
712 if n != y.len() {
713 return Err(FdarError::InvalidDimension {
714 parameter: "y",
715 expected: format!("{n} (matching data rows)"),
716 actual: format!("{}", y.len()),
717 });
718 }
719 if n_boot < 2 {
720 return Err(FdarError::InvalidParameter {
721 parameter: "n_boot",
722 message: "must be >= 2".into(),
723 });
724 }
725 if ncomp == 0 {
726 return Err(FdarError::InvalidParameter {
727 parameter: "ncomp",
728 message: "must be > 0".into(),
729 });
730 }
731
732 let mut rng = StdRng::seed_from_u64(seed);
733 let (all_beta_t, all_coefs, all_abs_coefs, all_metrics) =
734 bootstrap_logistic_stability(data, y, scalar_covariates, n, ncomp, n_boot, &mut rng);
735
736 build_stability_result(
737 &all_beta_t,
738 &all_coefs,
739 &all_abs_coefs,
740 &all_metrics,
741 m,
742 ncomp,
743 )
744 .ok_or_else(|| FdarError::ComputationFailed {
745 operation: "explanation_stability_logistic",
746 detail: "insufficient successful bootstrap refits; try increasing n_boot or check that the model fits reliably on subsampled data".into(),
747 })
748}
749
750#[derive(Debug, Clone, PartialEq)]
756pub struct AnchorCondition {
757 pub component: usize,
759 pub lower_bound: f64,
761 pub upper_bound: f64,
763}
764
765#[derive(Debug, Clone, PartialEq)]
767pub struct AnchorRule {
768 pub conditions: Vec<AnchorCondition>,
770 pub precision: f64,
772 pub coverage: f64,
774 pub n_matching: usize,
776}
777
778#[derive(Debug, Clone, PartialEq)]
780#[non_exhaustive]
781pub struct AnchorResult {
782 pub rule: AnchorRule,
784 pub observation: usize,
786 pub predicted_value: f64,
788}
789
790#[must_use = "expensive computation whose result should not be discarded"]
809pub fn anchor_explanation(
810 fit: &FregreLmResult,
811 data: &FdMatrix,
812 scalar_covariates: Option<&FdMatrix>,
813 observation: usize,
814 precision_threshold: f64,
815 n_bins: usize,
816) -> Result<AnchorResult, FdarError> {
817 let (n, m) = data.shape();
818 if n == 0 {
819 return Err(FdarError::InvalidDimension {
820 parameter: "data",
821 expected: ">0 rows".into(),
822 actual: "0".into(),
823 });
824 }
825 if m != fit.fpca.mean.len() {
826 return Err(FdarError::InvalidDimension {
827 parameter: "data",
828 expected: format!("{} columns", fit.fpca.mean.len()),
829 actual: format!("{m}"),
830 });
831 }
832 if observation >= n {
833 return Err(FdarError::InvalidParameter {
834 parameter: "observation",
835 message: format!("observation {observation} >= n {n}"),
836 });
837 }
838 if n_bins < 2 {
839 return Err(FdarError::InvalidParameter {
840 parameter: "n_bins",
841 message: "must be >= 2".into(),
842 });
843 }
844 let ncomp = fit.ncomp;
845 let scores = project_scores(
846 data,
847 &fit.fpca.mean,
848 &fit.fpca.rotation,
849 ncomp,
850 &fit.fpca.weights,
851 );
852 let obs_pred = fit.fitted_values[observation];
853 let tol = fit.residual_se;
854
855 let same_pred = |i: usize| -> bool {
857 let mut yhat = fit.coefficients[0];
858 for k in 0..ncomp {
859 yhat += fit.coefficients[1 + k] * scores[(i, k)];
860 }
861 if let Some(sc) = scalar_covariates {
862 for j in 0..fit.gamma.len() {
863 yhat += fit.gamma[j] * sc[(i, j)];
864 }
865 }
866 (yhat - obs_pred).abs() <= tol
867 };
868
869 let (rule, _) = anchor_beam_search(
870 &scores,
871 ncomp,
872 n,
873 observation,
874 precision_threshold,
875 n_bins,
876 &same_pred,
877 );
878
879 Ok(AnchorResult {
880 rule,
881 observation,
882 predicted_value: obs_pred,
883 })
884}
885
886#[must_use = "expensive computation whose result should not be discarded"]
896pub fn anchor_explanation_logistic(
897 fit: &FunctionalLogisticResult,
898 data: &FdMatrix,
899 scalar_covariates: Option<&FdMatrix>,
900 observation: usize,
901 precision_threshold: f64,
902 n_bins: usize,
903) -> Result<AnchorResult, FdarError> {
904 let (n, m) = data.shape();
905 if n == 0 {
906 return Err(FdarError::InvalidDimension {
907 parameter: "data",
908 expected: ">0 rows".into(),
909 actual: "0".into(),
910 });
911 }
912 if m != fit.fpca.mean.len() {
913 return Err(FdarError::InvalidDimension {
914 parameter: "data",
915 expected: format!("{} columns", fit.fpca.mean.len()),
916 actual: format!("{m}"),
917 });
918 }
919 if observation >= n {
920 return Err(FdarError::InvalidParameter {
921 parameter: "observation",
922 message: format!("observation {observation} >= n {n}"),
923 });
924 }
925 if n_bins < 2 {
926 return Err(FdarError::InvalidParameter {
927 parameter: "n_bins",
928 message: "must be >= 2".into(),
929 });
930 }
931 let ncomp = fit.ncomp;
932 let scores = project_scores(
933 data,
934 &fit.fpca.mean,
935 &fit.fpca.rotation,
936 ncomp,
937 &fit.fpca.weights,
938 );
939 let obs_class = fit.predicted_classes[observation];
940 let obs_prob = fit.probabilities[observation];
941 let p_scalar = fit.gamma.len();
942
943 let same_pred = |i: usize| -> bool {
945 let mut eta = fit.intercept;
946 for k in 0..ncomp {
947 eta += fit.coefficients[1 + k] * scores[(i, k)];
948 }
949 if let Some(sc) = scalar_covariates {
950 for j in 0..p_scalar {
951 eta += fit.gamma[j] * sc[(i, j)];
952 }
953 }
954 let pred_class = usize::from(sigmoid(eta) >= 0.5);
955 pred_class == obs_class
956 };
957
958 let (rule, _) = anchor_beam_search(
959 &scores,
960 ncomp,
961 n,
962 observation,
963 precision_threshold,
964 n_bins,
965 &same_pred,
966 );
967
968 Ok(AnchorResult {
969 rule,
970 observation,
971 predicted_value: obs_prob,
972 })
973}
974
975fn hosmer_lemeshow_computation(
981 probabilities: &[f64],
982 y: &[f64],
983 n: usize,
984 n_groups: usize,
985) -> (f64, Vec<(f64, f64)>, Vec<usize>) {
986 let mut sorted_idx: Vec<usize> = (0..n).collect();
987 sorted_idx.sort_by(|&a, &b| {
988 probabilities[a]
989 .partial_cmp(&probabilities[b])
990 .unwrap_or(std::cmp::Ordering::Equal)
991 });
992
993 let group_size = n / n_groups;
994 let remainder = n % n_groups;
995 let mut start = 0;
996
997 let mut chi2 = 0.0;
998 let mut reliability_bins = Vec::with_capacity(n_groups);
999 let mut bin_counts = Vec::with_capacity(n_groups);
1000
1001 for g in 0..n_groups {
1002 let sz = group_size + usize::from(g < remainder);
1003 let group = &sorted_idx[start..start + sz];
1004 start += sz;
1005
1006 let ng = group.len();
1007 if ng == 0 {
1008 continue;
1009 }
1010 let o_g: f64 = group.iter().map(|&i| y[i]).sum();
1011 let e_g: f64 = group.iter().map(|&i| probabilities[i]).sum();
1012 let p_bar = e_g / ng as f64;
1013 let mean_obs = o_g / ng as f64;
1014
1015 reliability_bins.push((p_bar, mean_obs));
1016 bin_counts.push(ng);
1017
1018 let denom = ng as f64 * p_bar * (1.0 - p_bar);
1019 if denom > 1e-15 {
1020 chi2 += (o_g - e_g).powi(2) / denom;
1021 }
1022 }
1023
1024 (chi2, reliability_bins, bin_counts)
1025}
1026
1027fn compute_equal_width_ece(
1029 probabilities: &[f64],
1030 y: &[f64],
1031 n: usize,
1032 n_bins: usize,
1033) -> (f64, f64, Vec<f64>) {
1034 let mut bin_sum_y = vec![0.0; n_bins];
1035 let mut bin_sum_p = vec![0.0; n_bins];
1036 let mut bin_count = vec![0usize; n_bins];
1037
1038 for i in 0..n {
1039 let b = ((probabilities[i] * n_bins as f64).floor() as usize).min(n_bins - 1);
1040 bin_sum_y[b] += y[i];
1041 bin_sum_p[b] += probabilities[i];
1042 bin_count[b] += 1;
1043 }
1044
1045 let mut ece = 0.0;
1046 let mut mce: f64 = 0.0;
1047 let mut bin_ece_contributions = vec![0.0; n_bins];
1048
1049 for b in 0..n_bins {
1050 if bin_count[b] == 0 {
1051 continue;
1052 }
1053 let gap = (bin_sum_y[b] / bin_count[b] as f64 - bin_sum_p[b] / bin_count[b] as f64).abs();
1054 let contrib = bin_count[b] as f64 / n as f64 * gap;
1055 bin_ece_contributions[b] = contrib;
1056 ece += contrib;
1057 if gap > mce {
1058 mce = gap;
1059 }
1060 }
1061
1062 (ece, mce, bin_ece_contributions)
1063}
1064
1065fn bootstrap_logistic_stability(
1067 data: &FdMatrix,
1068 y: &[f64],
1069 scalar_covariates: Option<&FdMatrix>,
1070 n: usize,
1071 ncomp: usize,
1072 n_boot: usize,
1073 rng: &mut StdRng,
1074) -> (Vec<Vec<f64>>, Vec<Vec<f64>>, Vec<Vec<f64>>, Vec<f64>) {
1075 let mut all_beta_t: Vec<Vec<f64>> = Vec::new();
1076 let mut all_coefs: Vec<Vec<f64>> = Vec::new();
1077 let mut all_abs_coefs: Vec<Vec<f64>> = Vec::new();
1078 let mut all_metrics: Vec<f64> = Vec::new();
1079
1080 for _ in 0..n_boot {
1081 let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
1082 let boot_data = subsample_rows(data, &idx);
1083 let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
1084 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
1085 let has_both = boot_y.iter().any(|&v| v < 0.5) && boot_y.iter().any(|&v| v >= 0.5);
1086 if !has_both {
1087 continue;
1088 }
1089 if let Ok(refit) =
1090 functional_logistic(&boot_data, &boot_y, boot_sc.as_ref(), ncomp, 25, 1e-6)
1091 {
1092 all_beta_t.push(refit.beta_t.clone());
1093 let coefs: Vec<f64> = (0..ncomp).map(|k| refit.coefficients[1 + k]).collect();
1094 all_abs_coefs.push(coefs.iter().map(|c| c.abs()).collect());
1095 all_coefs.push(coefs);
1096 all_metrics.push(refit.accuracy);
1097 }
1098 }
1099
1100 (all_beta_t, all_coefs, all_abs_coefs, all_metrics)
1101}
1102
1103fn bootstrap_logistic_coefs(
1105 data: &FdMatrix,
1106 y: &[f64],
1107 scalar_covariates: Option<&FdMatrix>,
1108 n: usize,
1109 ncomp: usize,
1110 n_boot: usize,
1111 rng: &mut StdRng,
1112) -> Vec<Vec<f64>> {
1113 let mut boot_coefs = Vec::new();
1114 for _ in 0..n_boot {
1115 let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
1116 let boot_data = subsample_rows(data, &idx);
1117 let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
1118 let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
1119 let has_both = boot_y.iter().any(|&v| v < 0.5) && boot_y.iter().any(|&v| v >= 0.5);
1120 if !has_both {
1121 continue;
1122 }
1123 if let Ok(refit) =
1124 functional_logistic(&boot_data, &boot_y, boot_sc.as_ref(), ncomp, 25, 1e-6)
1125 {
1126 boot_coefs.push((0..ncomp).map(|k| refit.coefficients[1 + k]).collect());
1127 }
1128 }
1129 boot_coefs
1130}