Skip to main content

fdars_core/explain/
advanced.rs

1//! Calibration, conformal prediction, regression depth, stability, and anchors.
2
3use super::helpers::{
4    anchor_beam_search, beta_depth_from_bootstrap, build_stability_result,
5    calibration_gap_weighted, compute_score_depths, conformal_quantile_and_coverage,
6    predict_from_scores, project_scores, subsample_rows, validate_conformal_inputs,
7};
8use crate::error::FdarError;
9use crate::matrix::FdMatrix;
10use crate::scalar_on_function::{
11    fregre_lm, functional_logistic, sigmoid, FregreLmResult, FunctionalLogisticResult,
12};
13use rand::prelude::*;
14
15// ===========================================================================
16// Calibration Diagnostics (logistic only)
17// ===========================================================================
18
19/// Calibration diagnostics for a functional logistic regression model.
20#[derive(Debug, Clone, PartialEq)]
21#[non_exhaustive]
22pub struct CalibrationDiagnosticsResult {
23    /// Brier score: (1/n) Σ (p_i - y_i)².
24    pub brier_score: f64,
25    /// Log loss: -(1/n) Σ [y log p + (1-y) log(1-p)].
26    pub log_loss: f64,
27    /// Hosmer-Lemeshow chi² statistic.
28    pub hosmer_lemeshow_chi2: f64,
29    /// Degrees of freedom: n_groups - 2.
30    pub hosmer_lemeshow_df: usize,
31    /// Number of calibration groups.
32    pub n_groups: usize,
33    /// Reliability bins: (mean_predicted, mean_observed) per group.
34    pub reliability_bins: Vec<(f64, f64)>,
35    /// Number of observations in each group.
36    pub bin_counts: Vec<usize>,
37}
38
39/// Calibration diagnostics for a functional logistic regression model.
40///
41/// # Errors
42///
43/// Returns [`FdarError::InvalidDimension`] if `fit.probabilities` is empty or
44/// `y.len()` does not match the number of probabilities.
45/// Returns [`FdarError::InvalidParameter`] if `n_groups < 2`.
46#[must_use = "expensive computation whose result should not be discarded"]
47pub fn calibration_diagnostics(
48    fit: &FunctionalLogisticResult,
49    y: &[f64],
50    n_groups: usize,
51) -> Result<CalibrationDiagnosticsResult, FdarError> {
52    let n = fit.probabilities.len();
53    if n == 0 {
54        return Err(FdarError::InvalidDimension {
55            parameter: "probabilities",
56            expected: ">0 length".into(),
57            actual: "0".into(),
58        });
59    }
60    if n != y.len() {
61        return Err(FdarError::InvalidDimension {
62            parameter: "y",
63            expected: format!("{n} (matching probabilities)"),
64            actual: format!("{}", y.len()),
65        });
66    }
67    if n_groups < 2 {
68        return Err(FdarError::InvalidParameter {
69            parameter: "n_groups",
70            message: "must be >= 2".into(),
71        });
72    }
73
74    // Brier score
75    let brier_score: f64 = fit
76        .probabilities
77        .iter()
78        .zip(y)
79        .map(|(&p, &yi)| (p - yi).powi(2))
80        .sum::<f64>()
81        / n as f64;
82
83    // Log loss
84    let log_loss: f64 = -fit
85        .probabilities
86        .iter()
87        .zip(y)
88        .map(|(&p, &yi)| {
89            let p_clip = p.clamp(1e-15, 1.0 - 1e-15);
90            yi * p_clip.ln() + (1.0 - yi) * (1.0 - p_clip).ln()
91        })
92        .sum::<f64>()
93        / n as f64;
94
95    let (hosmer_lemeshow_chi2, reliability_bins, bin_counts) =
96        hosmer_lemeshow_computation(&fit.probabilities, y, n, n_groups);
97
98    let actual_groups = bin_counts.len();
99    let hosmer_lemeshow_df = if actual_groups > 2 {
100        actual_groups - 2
101    } else {
102        1
103    };
104
105    Ok(CalibrationDiagnosticsResult {
106        brier_score,
107        log_loss,
108        hosmer_lemeshow_chi2,
109        hosmer_lemeshow_df,
110        n_groups: actual_groups,
111        reliability_bins,
112        bin_counts,
113    })
114}
115
116// ===========================================================================
117// Expected Calibration Error (ECE)
118// ===========================================================================
119
120/// Result of expected calibration error analysis.
121#[derive(Debug, Clone, PartialEq)]
122#[non_exhaustive]
123pub struct EceResult {
124    /// Expected calibration error: Σ (n_b/n) |acc_b - conf_b|.
125    pub ece: f64,
126    /// Maximum calibration error: max |acc_b - conf_b|.
127    pub mce: f64,
128    /// Adaptive calibration error (equal-mass bins).
129    pub ace: f64,
130    /// Number of bins used.
131    pub n_bins: usize,
132    /// Per-bin ECE contributions (length n_bins).
133    pub bin_ece_contributions: Vec<f64>,
134}
135
136/// Compute expected, maximum, and adaptive calibration errors for a logistic model.
137///
138/// # Arguments
139/// * `fit` — A fitted [`FunctionalLogisticResult`]
140/// * `y` — Binary labels (0/1), length n
141/// * `n_bins` — Number of bins for equal-width binning
142///
143/// # Errors
144///
145/// Returns [`FdarError::InvalidDimension`] if `fit.probabilities` is empty or
146/// `y.len()` does not match the number of probabilities.
147/// Returns [`FdarError::InvalidParameter`] if `n_bins` is zero.
148#[must_use = "expensive computation whose result should not be discarded"]
149pub fn expected_calibration_error(
150    fit: &FunctionalLogisticResult,
151    y: &[f64],
152    n_bins: usize,
153) -> Result<EceResult, FdarError> {
154    let n = fit.probabilities.len();
155    if n == 0 {
156        return Err(FdarError::InvalidDimension {
157            parameter: "probabilities",
158            expected: ">0 length".into(),
159            actual: "0".into(),
160        });
161    }
162    if n != y.len() {
163        return Err(FdarError::InvalidDimension {
164            parameter: "y",
165            expected: format!("{n} (matching probabilities)"),
166            actual: format!("{}", y.len()),
167        });
168    }
169    if n_bins == 0 {
170        return Err(FdarError::InvalidParameter {
171            parameter: "n_bins",
172            message: "must be > 0".into(),
173        });
174    }
175
176    let (ece, mce, bin_ece_contributions) =
177        compute_equal_width_ece(&fit.probabilities, y, n, n_bins);
178
179    // ACE: equal-mass (quantile) bins
180    let mut sorted_idx: Vec<usize> = (0..n).collect();
181    sorted_idx.sort_by(|&a, &b| {
182        fit.probabilities[a]
183            .partial_cmp(&fit.probabilities[b])
184            .unwrap_or(std::cmp::Ordering::Equal)
185    });
186    let group_size = n / n_bins.max(1);
187    let mut ace = 0.0;
188    let mut start = 0;
189    for g in 0..n_bins {
190        if start >= n {
191            break;
192        }
193        let end = if g < n_bins - 1 {
194            (start + group_size).min(n)
195        } else {
196            n
197        };
198        ace += calibration_gap_weighted(&sorted_idx[start..end], y, &fit.probabilities, n);
199        start = end;
200    }
201
202    Ok(EceResult {
203        ece,
204        mce,
205        ace,
206        n_bins,
207        bin_ece_contributions,
208    })
209}
210
211// ===========================================================================
212// Conformal Prediction Residuals
213// ===========================================================================
214
215/// Result of split-conformal prediction.
216#[derive(Debug, Clone, PartialEq)]
217#[non_exhaustive]
218pub struct ConformalPredictionResult {
219    /// Predictions on test data (length n_test).
220    pub predictions: Vec<f64>,
221    /// Lower bounds of prediction intervals (length n_test).
222    pub lower: Vec<f64>,
223    /// Upper bounds of prediction intervals (length n_test).
224    pub upper: Vec<f64>,
225    /// Quantile of calibration residuals.
226    pub residual_quantile: f64,
227    /// Empirical coverage on the calibration set.
228    pub coverage: f64,
229    /// Absolute residuals on calibration set.
230    pub calibration_scores: Vec<f64>,
231}
232
233/// Split-conformal prediction intervals for a linear functional regression.
234///
235/// Randomly splits training data into proper-train and calibration subsets,
236/// refits the model, and constructs distribution-free prediction intervals.
237///
238/// # Arguments
239/// * `fit` — Original [`FregreLmResult`] (used for ncomp)
240/// * `train_data` — Training functional data (n × m)
241/// * `train_y` — Training response (length n)
242/// * `test_data` — Test functional data (n_test × m)
243/// * `scalar_covariates_train` — Optional scalar covariates for training
244/// * `scalar_covariates_test` — Optional scalar covariates for test
245/// * `cal_fraction` — Fraction of training data for calibration (0, 1)
246/// * `alpha` — Miscoverage level (e.g. 0.1 for 90% intervals)
247/// * `seed` — Random seed
248///
249/// # Errors
250///
251/// Returns [`FdarError::InvalidParameter`] if input dimensions or parameters
252/// are invalid (e.g., mismatched columns between train and test, `cal_fraction`
253/// outside (0, 1), `alpha` outside (0, 1), or too few training observations).
254/// May also propagate errors from [`crate::scalar_on_function::fregre_lm`]
255/// when refitting on the proper-training subset.
256#[must_use = "expensive computation whose result should not be discarded"]
257pub fn conformal_prediction_residuals(
258    fit: &FregreLmResult,
259    train_data: &FdMatrix,
260    train_y: &[f64],
261    test_data: &FdMatrix,
262    scalar_covariates_train: Option<&FdMatrix>,
263    scalar_covariates_test: Option<&FdMatrix>,
264    cal_fraction: f64,
265    alpha: f64,
266    seed: u64,
267) -> Result<ConformalPredictionResult, FdarError> {
268    let (n, m) = train_data.shape();
269    let (n_test, m_test) = test_data.shape();
270    let ncomp = fit.ncomp;
271    let (_n_cal, n_proper) = validate_conformal_inputs(
272        n,
273        m,
274        n_test,
275        m_test,
276        train_y.len(),
277        ncomp,
278        cal_fraction,
279        alpha,
280    )
281    .ok_or_else(|| FdarError::InvalidParameter {
282        parameter: "conformal_prediction_residuals",
283        message: "invalid input dimensions or parameters".into(),
284    })?;
285
286    // Random split
287    let mut rng = StdRng::seed_from_u64(seed);
288    let mut all_idx: Vec<usize> = (0..n).collect();
289    all_idx.shuffle(&mut rng);
290    let proper_idx = &all_idx[..n_proper];
291    let cal_idx = &all_idx[n_proper..];
292
293    // Subsample data
294    let proper_data = subsample_rows(train_data, proper_idx);
295    let proper_y: Vec<f64> = proper_idx.iter().map(|&i| train_y[i]).collect();
296    let proper_sc = scalar_covariates_train.map(|sc| subsample_rows(sc, proper_idx));
297
298    // Refit on proper-train
299    let refit = fregre_lm(&proper_data, &proper_y, proper_sc.as_ref(), ncomp)?;
300
301    // Predict on calibration set
302    let cal_data = subsample_rows(train_data, cal_idx);
303    let cal_sc = scalar_covariates_train.map(|sc| subsample_rows(sc, cal_idx));
304    let cal_scores = project_scores(
305        &cal_data,
306        &refit.fpca.mean,
307        &refit.fpca.rotation,
308        ncomp,
309        &refit.fpca.weights,
310    );
311    let cal_preds = predict_from_scores(
312        &cal_scores,
313        &refit.coefficients,
314        &refit.gamma,
315        cal_sc.as_ref(),
316        ncomp,
317    );
318    let cal_n = cal_idx.len();
319
320    let calibration_scores: Vec<f64> = cal_idx
321        .iter()
322        .enumerate()
323        .map(|(i, &orig)| (train_y[orig] - cal_preds[i]).abs())
324        .collect();
325
326    let (residual_quantile, coverage) =
327        conformal_quantile_and_coverage(&calibration_scores, cal_n, alpha);
328
329    // Predict on test data
330    let test_scores = project_scores(
331        test_data,
332        &refit.fpca.mean,
333        &refit.fpca.rotation,
334        ncomp,
335        &refit.fpca.weights,
336    );
337    let predictions = predict_from_scores(
338        &test_scores,
339        &refit.coefficients,
340        &refit.gamma,
341        scalar_covariates_test,
342        ncomp,
343    );
344
345    let lower: Vec<f64> = predictions.iter().map(|&p| p - residual_quantile).collect();
346    let upper: Vec<f64> = predictions.iter().map(|&p| p + residual_quantile).collect();
347
348    Ok(ConformalPredictionResult {
349        predictions,
350        lower,
351        upper,
352        residual_quantile,
353        coverage,
354        calibration_scores,
355    })
356}
357
358// ===========================================================================
359// Regression Depth
360// ===========================================================================
361
362/// Type of functional depth measure for regression diagnostics.
363#[derive(Debug, Clone, Copy, PartialEq, Eq)]
364#[non_exhaustive]
365pub enum DepthType {
366    FraimanMuniz,
367    ModifiedBand,
368    FunctionalSpatial,
369}
370
371/// Result of regression depth analysis.
372#[derive(Debug, Clone, PartialEq)]
373#[non_exhaustive]
374pub struct RegressionDepthResult {
375    /// Depth of β̂ in bootstrap distribution.
376    pub beta_depth: f64,
377    /// Depth of each observation's FPC scores (length n).
378    pub score_depths: Vec<f64>,
379    /// Mean of score_depths.
380    pub mean_score_depth: f64,
381    /// Depth method used.
382    pub depth_type: DepthType,
383    /// Number of successful bootstrap refits.
384    pub n_boot_success: usize,
385}
386
387/// Regression depth diagnostics for a linear functional regression.
388///
389/// Computes depth of each observation's FPC scores and depth of the
390/// regression coefficients in a bootstrap distribution.
391///
392/// # Arguments
393/// * `fit` — Fitted [`FregreLmResult`]
394/// * `data` — Functional data (n × m)
395/// * `y` — Response (length n)
396/// * `scalar_covariates` — Optional scalar covariates
397/// * `n_boot` — Number of bootstrap iterations
398/// * `depth_type` — Which depth measure to use
399/// * `seed` — Random seed
400///
401/// # Errors
402///
403/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 4 rows,
404/// zero columns, or `y.len()` does not match the row count.
405/// Returns [`FdarError::InvalidParameter`] if `n_boot` is zero.
406/// Returns [`FdarError::ComputationFailed`] if score depth computation returns
407/// empty.
408#[must_use = "expensive computation whose result should not be discarded"]
409pub fn regression_depth(
410    fit: &FregreLmResult,
411    data: &FdMatrix,
412    y: &[f64],
413    scalar_covariates: Option<&FdMatrix>,
414    n_boot: usize,
415    depth_type: DepthType,
416    seed: u64,
417) -> Result<RegressionDepthResult, FdarError> {
418    let (n, m) = data.shape();
419    if n < 4 {
420        return Err(FdarError::InvalidDimension {
421            parameter: "data",
422            expected: ">=4 rows".into(),
423            actual: format!("{n}"),
424        });
425    }
426    if m == 0 {
427        return Err(FdarError::InvalidDimension {
428            parameter: "data",
429            expected: ">0 columns".into(),
430            actual: "0".into(),
431        });
432    }
433    if n != y.len() {
434        return Err(FdarError::InvalidDimension {
435            parameter: "y",
436            expected: format!("{n} (matching data rows)"),
437            actual: format!("{}", y.len()),
438        });
439    }
440    if n_boot == 0 {
441        return Err(FdarError::InvalidParameter {
442            parameter: "n_boot",
443            message: "must be > 0".into(),
444        });
445    }
446    let ncomp = fit.ncomp;
447    let scores = project_scores(
448        data,
449        &fit.fpca.mean,
450        &fit.fpca.rotation,
451        ncomp,
452        &fit.fpca.weights,
453    );
454    let score_depths = compute_score_depths(&scores, depth_type);
455    if score_depths.is_empty() {
456        return Err(FdarError::ComputationFailed {
457            operation: "regression_depth",
458            detail: "score depth computation returned empty; try increasing ncomp or check that the score matrix has sufficient variability".into(),
459        });
460    }
461    let mean_score_depth = score_depths.iter().sum::<f64>() / score_depths.len() as f64;
462
463    let orig_coefs: Vec<f64> = (0..ncomp).map(|k| fit.coefficients[1 + k]).collect();
464    let mut rng = StdRng::seed_from_u64(seed);
465    let mut boot_coefs = Vec::new();
466    for _ in 0..n_boot {
467        let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
468        let boot_data = subsample_rows(data, &idx);
469        let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
470        let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
471        if let Ok(refit) = fregre_lm(&boot_data, &boot_y, boot_sc.as_ref(), ncomp) {
472            boot_coefs.push((0..ncomp).map(|k| refit.coefficients[1 + k]).collect());
473        }
474    }
475
476    let beta_depth = beta_depth_from_bootstrap(&boot_coefs, &orig_coefs, ncomp, depth_type);
477
478    Ok(RegressionDepthResult {
479        beta_depth,
480        score_depths,
481        mean_score_depth,
482        depth_type,
483        n_boot_success: boot_coefs.len(),
484    })
485}
486
487/// Regression depth diagnostics for a functional logistic regression.
488///
489/// # Errors
490///
491/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 4 rows,
492/// zero columns, or `y.len()` does not match the row count.
493/// Returns [`FdarError::InvalidParameter`] if `n_boot` is zero.
494/// Returns [`FdarError::ComputationFailed`] if score depth computation returns
495/// empty.
496#[must_use = "expensive computation whose result should not be discarded"]
497pub fn regression_depth_logistic(
498    fit: &FunctionalLogisticResult,
499    data: &FdMatrix,
500    y: &[f64],
501    scalar_covariates: Option<&FdMatrix>,
502    n_boot: usize,
503    depth_type: DepthType,
504    seed: u64,
505) -> Result<RegressionDepthResult, FdarError> {
506    let (n, m) = data.shape();
507    if n < 4 {
508        return Err(FdarError::InvalidDimension {
509            parameter: "data",
510            expected: ">=4 rows".into(),
511            actual: format!("{n}"),
512        });
513    }
514    if m == 0 {
515        return Err(FdarError::InvalidDimension {
516            parameter: "data",
517            expected: ">0 columns".into(),
518            actual: "0".into(),
519        });
520    }
521    if n != y.len() {
522        return Err(FdarError::InvalidDimension {
523            parameter: "y",
524            expected: format!("{n} (matching data rows)"),
525            actual: format!("{}", y.len()),
526        });
527    }
528    if n_boot == 0 {
529        return Err(FdarError::InvalidParameter {
530            parameter: "n_boot",
531            message: "must be > 0".into(),
532        });
533    }
534    let ncomp = fit.ncomp;
535    let scores = project_scores(
536        data,
537        &fit.fpca.mean,
538        &fit.fpca.rotation,
539        ncomp,
540        &fit.fpca.weights,
541    );
542    let score_depths = compute_score_depths(&scores, depth_type);
543    if score_depths.is_empty() {
544        return Err(FdarError::ComputationFailed {
545            operation: "regression_depth_logistic",
546            detail: "score depth computation returned empty; try increasing ncomp or check that the score matrix has sufficient variability".into(),
547        });
548    }
549    let mean_score_depth = score_depths.iter().sum::<f64>() / score_depths.len() as f64;
550
551    let orig_coefs: Vec<f64> = (0..ncomp).map(|k| fit.coefficients[1 + k]).collect();
552    let mut rng = StdRng::seed_from_u64(seed);
553    let boot_coefs =
554        bootstrap_logistic_coefs(data, y, scalar_covariates, n, ncomp, n_boot, &mut rng);
555
556    let beta_depth = beta_depth_from_bootstrap(&boot_coefs, &orig_coefs, ncomp, depth_type);
557
558    Ok(RegressionDepthResult {
559        beta_depth,
560        score_depths,
561        mean_score_depth,
562        depth_type,
563        n_boot_success: boot_coefs.len(),
564    })
565}
566
567// ===========================================================================
568// Stability / Robustness Analysis
569// ===========================================================================
570
571/// Result of bootstrap stability analysis.
572#[derive(Debug, Clone, PartialEq)]
573#[non_exhaustive]
574pub struct StabilityAnalysisResult {
575    /// Pointwise std of β(t) across bootstraps (length m).
576    pub beta_t_std: Vec<f64>,
577    /// Std of FPC coefficients γ_k across bootstraps (length ncomp).
578    pub coefficient_std: Vec<f64>,
579    /// Std of R² or accuracy across bootstraps.
580    pub metric_std: f64,
581    /// Coefficient of variation of β(t): std / |mean| (length m).
582    pub beta_t_cv: Vec<f64>,
583    /// Mean Spearman rank correlation of FPC importance rankings.
584    pub importance_stability: f64,
585    /// Number of successful bootstrap refits.
586    pub n_boot_success: usize,
587}
588
589/// Bootstrap stability analysis of a linear functional regression.
590///
591/// Refits the model on `n_boot` bootstrap samples and reports variability
592/// of β(t), FPC coefficients, R², and importance rankings.
593///
594/// # Errors
595///
596/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 4 rows,
597/// zero columns, or `y.len()` does not match the row count.
598/// Returns [`FdarError::InvalidParameter`] if `n_boot < 2` or `ncomp` is zero.
599/// Returns [`FdarError::ComputationFailed`] if there are insufficient successful
600/// bootstrap refits.
601#[must_use = "expensive computation whose result should not be discarded"]
602pub fn explanation_stability(
603    data: &FdMatrix,
604    y: &[f64],
605    scalar_covariates: Option<&FdMatrix>,
606    ncomp: usize,
607    n_boot: usize,
608    seed: u64,
609) -> Result<StabilityAnalysisResult, FdarError> {
610    let (n, m) = data.shape();
611    if n < 4 {
612        return Err(FdarError::InvalidDimension {
613            parameter: "data",
614            expected: ">=4 rows".into(),
615            actual: format!("{n}"),
616        });
617    }
618    if m == 0 {
619        return Err(FdarError::InvalidDimension {
620            parameter: "data",
621            expected: ">0 columns".into(),
622            actual: "0".into(),
623        });
624    }
625    if n != y.len() {
626        return Err(FdarError::InvalidDimension {
627            parameter: "y",
628            expected: format!("{n} (matching data rows)"),
629            actual: format!("{}", y.len()),
630        });
631    }
632    if n_boot < 2 {
633        return Err(FdarError::InvalidParameter {
634            parameter: "n_boot",
635            message: "must be >= 2".into(),
636        });
637    }
638    if ncomp == 0 {
639        return Err(FdarError::InvalidParameter {
640            parameter: "ncomp",
641            message: "must be > 0".into(),
642        });
643    }
644
645    let mut rng = StdRng::seed_from_u64(seed);
646    let mut all_beta_t: Vec<Vec<f64>> = Vec::new();
647    let mut all_coefs: Vec<Vec<f64>> = Vec::new();
648    let mut all_metrics: Vec<f64> = Vec::new();
649    let mut all_abs_coefs: Vec<Vec<f64>> = Vec::new();
650
651    for _ in 0..n_boot {
652        let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
653        let boot_data = subsample_rows(data, &idx);
654        let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
655        let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
656        if let Ok(refit) = fregre_lm(&boot_data, &boot_y, boot_sc.as_ref(), ncomp) {
657            all_beta_t.push(refit.beta_t.clone());
658            let coefs: Vec<f64> = (0..ncomp).map(|k| refit.coefficients[1 + k]).collect();
659            all_abs_coefs.push(coefs.iter().map(|c| c.abs()).collect());
660            all_coefs.push(coefs);
661            all_metrics.push(refit.r_squared);
662        }
663    }
664
665    build_stability_result(
666        &all_beta_t,
667        &all_coefs,
668        &all_abs_coefs,
669        &all_metrics,
670        m,
671        ncomp,
672    )
673    .ok_or_else(|| FdarError::ComputationFailed {
674        operation: "explanation_stability",
675        detail: "insufficient successful bootstrap refits; try increasing n_boot or check that the model fits reliably on subsampled data".into(),
676    })
677}
678
679/// Bootstrap stability analysis of a functional logistic regression.
680///
681/// # Errors
682///
683/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 4 rows,
684/// zero columns, or `y.len()` does not match the row count.
685/// Returns [`FdarError::InvalidParameter`] if `n_boot < 2` or `ncomp` is zero.
686/// Returns [`FdarError::ComputationFailed`] if there are insufficient successful
687/// bootstrap refits.
688#[must_use = "expensive computation whose result should not be discarded"]
689pub fn explanation_stability_logistic(
690    data: &FdMatrix,
691    y: &[f64],
692    scalar_covariates: Option<&FdMatrix>,
693    ncomp: usize,
694    n_boot: usize,
695    seed: u64,
696) -> Result<StabilityAnalysisResult, FdarError> {
697    let (n, m) = data.shape();
698    if n < 4 {
699        return Err(FdarError::InvalidDimension {
700            parameter: "data",
701            expected: ">=4 rows".into(),
702            actual: format!("{n}"),
703        });
704    }
705    if m == 0 {
706        return Err(FdarError::InvalidDimension {
707            parameter: "data",
708            expected: ">0 columns".into(),
709            actual: "0".into(),
710        });
711    }
712    if n != y.len() {
713        return Err(FdarError::InvalidDimension {
714            parameter: "y",
715            expected: format!("{n} (matching data rows)"),
716            actual: format!("{}", y.len()),
717        });
718    }
719    if n_boot < 2 {
720        return Err(FdarError::InvalidParameter {
721            parameter: "n_boot",
722            message: "must be >= 2".into(),
723        });
724    }
725    if ncomp == 0 {
726        return Err(FdarError::InvalidParameter {
727            parameter: "ncomp",
728            message: "must be > 0".into(),
729        });
730    }
731
732    let mut rng = StdRng::seed_from_u64(seed);
733    let (all_beta_t, all_coefs, all_abs_coefs, all_metrics) =
734        bootstrap_logistic_stability(data, y, scalar_covariates, n, ncomp, n_boot, &mut rng);
735
736    build_stability_result(
737        &all_beta_t,
738        &all_coefs,
739        &all_abs_coefs,
740        &all_metrics,
741        m,
742        ncomp,
743    )
744    .ok_or_else(|| FdarError::ComputationFailed {
745        operation: "explanation_stability_logistic",
746        detail: "insufficient successful bootstrap refits; try increasing n_boot or check that the model fits reliably on subsampled data".into(),
747    })
748}
749
750// ===========================================================================
751// Anchors / Rule Extraction
752// ===========================================================================
753
754/// A single condition in an anchor rule.
755#[derive(Debug, Clone, PartialEq)]
756pub struct AnchorCondition {
757    /// FPC component index.
758    pub component: usize,
759    /// Lower bound on FPC score.
760    pub lower_bound: f64,
761    /// Upper bound on FPC score.
762    pub upper_bound: f64,
763}
764
765/// An anchor rule consisting of FPC score conditions.
766#[derive(Debug, Clone, PartialEq)]
767pub struct AnchorRule {
768    /// Conditions forming the rule (conjunction).
769    pub conditions: Vec<AnchorCondition>,
770    /// Precision: fraction of matching observations with same prediction.
771    pub precision: f64,
772    /// Coverage: fraction of all observations matching the rule.
773    pub coverage: f64,
774    /// Number of observations matching the rule.
775    pub n_matching: usize,
776}
777
778/// Result of anchor explanation for one observation.
779#[derive(Debug, Clone, PartialEq)]
780#[non_exhaustive]
781pub struct AnchorResult {
782    /// The anchor rule.
783    pub rule: AnchorRule,
784    /// Which observation was explained.
785    pub observation: usize,
786    /// Predicted value for the observation.
787    pub predicted_value: f64,
788}
789
790/// Anchor explanation for a linear functional regression.
791///
792/// Uses beam search in FPC score space to find a minimal set of conditions
793/// (score bin memberships) that locally "anchor" the prediction.
794///
795/// # Arguments
796/// * `fit` — Fitted [`FregreLmResult`]
797/// * `data` — Functional data (n × m)
798/// * `scalar_covariates` — Optional scalar covariates
799/// * `observation` — Index of observation to explain
800/// * `precision_threshold` — Minimum precision (e.g. 0.95)
801/// * `n_bins` — Number of quantile bins per FPC dimension
802///
803/// # Errors
804///
805/// Returns [`FdarError::InvalidDimension`] if `data` has zero rows or its column
806/// count does not match `fit.fpca.mean`.
807/// Returns [`FdarError::InvalidParameter`] if `observation >= n` or `n_bins < 2`.
808#[must_use = "expensive computation whose result should not be discarded"]
809pub fn anchor_explanation(
810    fit: &FregreLmResult,
811    data: &FdMatrix,
812    scalar_covariates: Option<&FdMatrix>,
813    observation: usize,
814    precision_threshold: f64,
815    n_bins: usize,
816) -> Result<AnchorResult, FdarError> {
817    let (n, m) = data.shape();
818    if n == 0 {
819        return Err(FdarError::InvalidDimension {
820            parameter: "data",
821            expected: ">0 rows".into(),
822            actual: "0".into(),
823        });
824    }
825    if m != fit.fpca.mean.len() {
826        return Err(FdarError::InvalidDimension {
827            parameter: "data",
828            expected: format!("{} columns", fit.fpca.mean.len()),
829            actual: format!("{m}"),
830        });
831    }
832    if observation >= n {
833        return Err(FdarError::InvalidParameter {
834            parameter: "observation",
835            message: format!("observation {observation} >= n {n}"),
836        });
837    }
838    if n_bins < 2 {
839        return Err(FdarError::InvalidParameter {
840            parameter: "n_bins",
841            message: "must be >= 2".into(),
842        });
843    }
844    let ncomp = fit.ncomp;
845    let scores = project_scores(
846        data,
847        &fit.fpca.mean,
848        &fit.fpca.rotation,
849        ncomp,
850        &fit.fpca.weights,
851    );
852    let obs_pred = fit.fitted_values[observation];
853    let tol = fit.residual_se;
854
855    // "Same prediction" for regression: within ±1 residual_se
856    let same_pred = |i: usize| -> bool {
857        let mut yhat = fit.coefficients[0];
858        for k in 0..ncomp {
859            yhat += fit.coefficients[1 + k] * scores[(i, k)];
860        }
861        if let Some(sc) = scalar_covariates {
862            for j in 0..fit.gamma.len() {
863                yhat += fit.gamma[j] * sc[(i, j)];
864            }
865        }
866        (yhat - obs_pred).abs() <= tol
867    };
868
869    let (rule, _) = anchor_beam_search(
870        &scores,
871        ncomp,
872        n,
873        observation,
874        precision_threshold,
875        n_bins,
876        &same_pred,
877    );
878
879    Ok(AnchorResult {
880        rule,
881        observation,
882        predicted_value: obs_pred,
883    })
884}
885
886/// Anchor explanation for a functional logistic regression.
887///
888/// "Same prediction" = same predicted class.
889///
890/// # Errors
891///
892/// Returns [`FdarError::InvalidDimension`] if `data` has zero rows or its column
893/// count does not match `fit.fpca.mean`.
894/// Returns [`FdarError::InvalidParameter`] if `observation >= n` or `n_bins < 2`.
895#[must_use = "expensive computation whose result should not be discarded"]
896pub fn anchor_explanation_logistic(
897    fit: &FunctionalLogisticResult,
898    data: &FdMatrix,
899    scalar_covariates: Option<&FdMatrix>,
900    observation: usize,
901    precision_threshold: f64,
902    n_bins: usize,
903) -> Result<AnchorResult, FdarError> {
904    let (n, m) = data.shape();
905    if n == 0 {
906        return Err(FdarError::InvalidDimension {
907            parameter: "data",
908            expected: ">0 rows".into(),
909            actual: "0".into(),
910        });
911    }
912    if m != fit.fpca.mean.len() {
913        return Err(FdarError::InvalidDimension {
914            parameter: "data",
915            expected: format!("{} columns", fit.fpca.mean.len()),
916            actual: format!("{m}"),
917        });
918    }
919    if observation >= n {
920        return Err(FdarError::InvalidParameter {
921            parameter: "observation",
922            message: format!("observation {observation} >= n {n}"),
923        });
924    }
925    if n_bins < 2 {
926        return Err(FdarError::InvalidParameter {
927            parameter: "n_bins",
928            message: "must be >= 2".into(),
929        });
930    }
931    let ncomp = fit.ncomp;
932    let scores = project_scores(
933        data,
934        &fit.fpca.mean,
935        &fit.fpca.rotation,
936        ncomp,
937        &fit.fpca.weights,
938    );
939    let obs_class = fit.predicted_classes[observation];
940    let obs_prob = fit.probabilities[observation];
941    let p_scalar = fit.gamma.len();
942
943    // "Same prediction" = same class
944    let same_pred = |i: usize| -> bool {
945        let mut eta = fit.intercept;
946        for k in 0..ncomp {
947            eta += fit.coefficients[1 + k] * scores[(i, k)];
948        }
949        if let Some(sc) = scalar_covariates {
950            for j in 0..p_scalar {
951                eta += fit.gamma[j] * sc[(i, j)];
952            }
953        }
954        let pred_class = usize::from(sigmoid(eta) >= 0.5);
955        pred_class == obs_class
956    };
957
958    let (rule, _) = anchor_beam_search(
959        &scores,
960        ncomp,
961        n,
962        observation,
963        precision_threshold,
964        n_bins,
965        &same_pred,
966    );
967
968    Ok(AnchorResult {
969        rule,
970        observation,
971        predicted_value: obs_prob,
972    })
973}
974
975// ---------------------------------------------------------------------------
976// Private helpers
977// ---------------------------------------------------------------------------
978
979/// Hosmer-Lemeshow computation.
980fn hosmer_lemeshow_computation(
981    probabilities: &[f64],
982    y: &[f64],
983    n: usize,
984    n_groups: usize,
985) -> (f64, Vec<(f64, f64)>, Vec<usize>) {
986    let mut sorted_idx: Vec<usize> = (0..n).collect();
987    sorted_idx.sort_by(|&a, &b| {
988        probabilities[a]
989            .partial_cmp(&probabilities[b])
990            .unwrap_or(std::cmp::Ordering::Equal)
991    });
992
993    let group_size = n / n_groups;
994    let remainder = n % n_groups;
995    let mut start = 0;
996
997    let mut chi2 = 0.0;
998    let mut reliability_bins = Vec::with_capacity(n_groups);
999    let mut bin_counts = Vec::with_capacity(n_groups);
1000
1001    for g in 0..n_groups {
1002        let sz = group_size + usize::from(g < remainder);
1003        let group = &sorted_idx[start..start + sz];
1004        start += sz;
1005
1006        let ng = group.len();
1007        if ng == 0 {
1008            continue;
1009        }
1010        let o_g: f64 = group.iter().map(|&i| y[i]).sum();
1011        let e_g: f64 = group.iter().map(|&i| probabilities[i]).sum();
1012        let p_bar = e_g / ng as f64;
1013        let mean_obs = o_g / ng as f64;
1014
1015        reliability_bins.push((p_bar, mean_obs));
1016        bin_counts.push(ng);
1017
1018        let denom = ng as f64 * p_bar * (1.0 - p_bar);
1019        if denom > 1e-15 {
1020            chi2 += (o_g - e_g).powi(2) / denom;
1021        }
1022    }
1023
1024    (chi2, reliability_bins, bin_counts)
1025}
1026
1027/// Compute equal-width ECE, MCE, and per-bin contributions.
1028fn compute_equal_width_ece(
1029    probabilities: &[f64],
1030    y: &[f64],
1031    n: usize,
1032    n_bins: usize,
1033) -> (f64, f64, Vec<f64>) {
1034    let mut bin_sum_y = vec![0.0; n_bins];
1035    let mut bin_sum_p = vec![0.0; n_bins];
1036    let mut bin_count = vec![0usize; n_bins];
1037
1038    for i in 0..n {
1039        let b = ((probabilities[i] * n_bins as f64).floor() as usize).min(n_bins - 1);
1040        bin_sum_y[b] += y[i];
1041        bin_sum_p[b] += probabilities[i];
1042        bin_count[b] += 1;
1043    }
1044
1045    let mut ece = 0.0;
1046    let mut mce: f64 = 0.0;
1047    let mut bin_ece_contributions = vec![0.0; n_bins];
1048
1049    for b in 0..n_bins {
1050        if bin_count[b] == 0 {
1051            continue;
1052        }
1053        let gap = (bin_sum_y[b] / bin_count[b] as f64 - bin_sum_p[b] / bin_count[b] as f64).abs();
1054        let contrib = bin_count[b] as f64 / n as f64 * gap;
1055        bin_ece_contributions[b] = contrib;
1056        ece += contrib;
1057        if gap > mce {
1058            mce = gap;
1059        }
1060    }
1061
1062    (ece, mce, bin_ece_contributions)
1063}
1064
1065/// Bootstrap logistic stability: collect beta_t, coefs, abs_coefs, and metrics.
1066fn bootstrap_logistic_stability(
1067    data: &FdMatrix,
1068    y: &[f64],
1069    scalar_covariates: Option<&FdMatrix>,
1070    n: usize,
1071    ncomp: usize,
1072    n_boot: usize,
1073    rng: &mut StdRng,
1074) -> (Vec<Vec<f64>>, Vec<Vec<f64>>, Vec<Vec<f64>>, Vec<f64>) {
1075    let mut all_beta_t: Vec<Vec<f64>> = Vec::new();
1076    let mut all_coefs: Vec<Vec<f64>> = Vec::new();
1077    let mut all_abs_coefs: Vec<Vec<f64>> = Vec::new();
1078    let mut all_metrics: Vec<f64> = Vec::new();
1079
1080    for _ in 0..n_boot {
1081        let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
1082        let boot_data = subsample_rows(data, &idx);
1083        let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
1084        let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
1085        let has_both = boot_y.iter().any(|&v| v < 0.5) && boot_y.iter().any(|&v| v >= 0.5);
1086        if !has_both {
1087            continue;
1088        }
1089        if let Ok(refit) =
1090            functional_logistic(&boot_data, &boot_y, boot_sc.as_ref(), ncomp, 25, 1e-6)
1091        {
1092            all_beta_t.push(refit.beta_t.clone());
1093            let coefs: Vec<f64> = (0..ncomp).map(|k| refit.coefficients[1 + k]).collect();
1094            all_abs_coefs.push(coefs.iter().map(|c| c.abs()).collect());
1095            all_coefs.push(coefs);
1096            all_metrics.push(refit.accuracy);
1097        }
1098    }
1099
1100    (all_beta_t, all_coefs, all_abs_coefs, all_metrics)
1101}
1102
1103/// Bootstrap logistic coefficients for regression depth.
1104fn bootstrap_logistic_coefs(
1105    data: &FdMatrix,
1106    y: &[f64],
1107    scalar_covariates: Option<&FdMatrix>,
1108    n: usize,
1109    ncomp: usize,
1110    n_boot: usize,
1111    rng: &mut StdRng,
1112) -> Vec<Vec<f64>> {
1113    let mut boot_coefs = Vec::new();
1114    for _ in 0..n_boot {
1115        let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
1116        let boot_data = subsample_rows(data, &idx);
1117        let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
1118        let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
1119        let has_both = boot_y.iter().any(|&v| v < 0.5) && boot_y.iter().any(|&v| v >= 0.5);
1120        if !has_both {
1121            continue;
1122        }
1123        if let Ok(refit) =
1124            functional_logistic(&boot_data, &boot_y, boot_sc.as_ref(), ncomp, 25, 1e-6)
1125        {
1126            boot_coefs.push((0..ncomp).map(|k| refit.coefficients[1 + k]).collect());
1127        }
1128    }
1129    boot_coefs
1130}