fdars_core/explain/
advanced.rs

1//! Calibration, conformal prediction, regression depth, stability, and anchors.
2
3use super::helpers::*;
4use crate::error::FdarError;
5use crate::matrix::FdMatrix;
6use crate::scalar_on_function::{
7    fregre_lm, functional_logistic, sigmoid, FregreLmResult, FunctionalLogisticResult,
8};
9use rand::prelude::*;
10
11// ===========================================================================
12// Calibration Diagnostics (logistic only)
13// ===========================================================================
14
15/// Calibration diagnostics for a functional logistic regression model.
16#[derive(Debug, Clone, PartialEq)]
17pub struct CalibrationDiagnosticsResult {
18    /// Brier score: (1/n) Σ (p_i - y_i)².
19    pub brier_score: f64,
20    /// Log loss: -(1/n) Σ [y log p + (1-y) log(1-p)].
21    pub log_loss: f64,
22    /// Hosmer-Lemeshow chi² statistic.
23    pub hosmer_lemeshow_chi2: f64,
24    /// Degrees of freedom: n_groups - 2.
25    pub hosmer_lemeshow_df: usize,
26    /// Number of calibration groups.
27    pub n_groups: usize,
28    /// Reliability bins: (mean_predicted, mean_observed) per group.
29    pub reliability_bins: Vec<(f64, f64)>,
30    /// Number of observations in each group.
31    pub bin_counts: Vec<usize>,
32}
33
34/// Calibration diagnostics for a functional logistic regression model.
35///
36/// # Errors
37///
38/// Returns [`FdarError::InvalidDimension`] if `fit.probabilities` is empty or
39/// `y.len()` does not match the number of probabilities.
40/// Returns [`FdarError::InvalidParameter`] if `n_groups < 2`.
41#[must_use = "expensive computation whose result should not be discarded"]
42pub fn calibration_diagnostics(
43    fit: &FunctionalLogisticResult,
44    y: &[f64],
45    n_groups: usize,
46) -> Result<CalibrationDiagnosticsResult, FdarError> {
47    let n = fit.probabilities.len();
48    if n == 0 {
49        return Err(FdarError::InvalidDimension {
50            parameter: "probabilities",
51            expected: ">0 length".into(),
52            actual: "0".into(),
53        });
54    }
55    if n != y.len() {
56        return Err(FdarError::InvalidDimension {
57            parameter: "y",
58            expected: format!("{n} (matching probabilities)"),
59            actual: format!("{}", y.len()),
60        });
61    }
62    if n_groups < 2 {
63        return Err(FdarError::InvalidParameter {
64            parameter: "n_groups",
65            message: "must be >= 2".into(),
66        });
67    }
68
69    // Brier score
70    let brier_score: f64 = fit
71        .probabilities
72        .iter()
73        .zip(y)
74        .map(|(&p, &yi)| (p - yi).powi(2))
75        .sum::<f64>()
76        / n as f64;
77
78    // Log loss
79    let log_loss: f64 = -fit
80        .probabilities
81        .iter()
82        .zip(y)
83        .map(|(&p, &yi)| {
84            let p_clip = p.clamp(1e-15, 1.0 - 1e-15);
85            yi * p_clip.ln() + (1.0 - yi) * (1.0 - p_clip).ln()
86        })
87        .sum::<f64>()
88        / n as f64;
89
90    let (hosmer_lemeshow_chi2, reliability_bins, bin_counts) =
91        hosmer_lemeshow_computation(&fit.probabilities, y, n, n_groups);
92
93    let actual_groups = bin_counts.len();
94    let hosmer_lemeshow_df = if actual_groups > 2 {
95        actual_groups - 2
96    } else {
97        1
98    };
99
100    Ok(CalibrationDiagnosticsResult {
101        brier_score,
102        log_loss,
103        hosmer_lemeshow_chi2,
104        hosmer_lemeshow_df,
105        n_groups: actual_groups,
106        reliability_bins,
107        bin_counts,
108    })
109}
110
111// ===========================================================================
112// Expected Calibration Error (ECE)
113// ===========================================================================
114
115/// Result of expected calibration error analysis.
116#[derive(Debug, Clone, PartialEq)]
117pub struct EceResult {
118    /// Expected calibration error: Σ (n_b/n) |acc_b - conf_b|.
119    pub ece: f64,
120    /// Maximum calibration error: max |acc_b - conf_b|.
121    pub mce: f64,
122    /// Adaptive calibration error (equal-mass bins).
123    pub ace: f64,
124    /// Number of bins used.
125    pub n_bins: usize,
126    /// Per-bin ECE contributions (length n_bins).
127    pub bin_ece_contributions: Vec<f64>,
128}
129
130/// Compute expected, maximum, and adaptive calibration errors for a logistic model.
131///
132/// # Arguments
133/// * `fit` — A fitted [`FunctionalLogisticResult`]
134/// * `y` — Binary labels (0/1), length n
135/// * `n_bins` — Number of bins for equal-width binning
136///
137/// # Errors
138///
139/// Returns [`FdarError::InvalidDimension`] if `fit.probabilities` is empty or
140/// `y.len()` does not match the number of probabilities.
141/// Returns [`FdarError::InvalidParameter`] if `n_bins` is zero.
142#[must_use = "expensive computation whose result should not be discarded"]
143pub fn expected_calibration_error(
144    fit: &FunctionalLogisticResult,
145    y: &[f64],
146    n_bins: usize,
147) -> Result<EceResult, FdarError> {
148    let n = fit.probabilities.len();
149    if n == 0 {
150        return Err(FdarError::InvalidDimension {
151            parameter: "probabilities",
152            expected: ">0 length".into(),
153            actual: "0".into(),
154        });
155    }
156    if n != y.len() {
157        return Err(FdarError::InvalidDimension {
158            parameter: "y",
159            expected: format!("{n} (matching probabilities)"),
160            actual: format!("{}", y.len()),
161        });
162    }
163    if n_bins == 0 {
164        return Err(FdarError::InvalidParameter {
165            parameter: "n_bins",
166            message: "must be > 0".into(),
167        });
168    }
169
170    let (ece, mce, bin_ece_contributions) =
171        compute_equal_width_ece(&fit.probabilities, y, n, n_bins);
172
173    // ACE: equal-mass (quantile) bins
174    let mut sorted_idx: Vec<usize> = (0..n).collect();
175    sorted_idx.sort_by(|&a, &b| {
176        fit.probabilities[a]
177            .partial_cmp(&fit.probabilities[b])
178            .unwrap_or(std::cmp::Ordering::Equal)
179    });
180    let group_size = n / n_bins.max(1);
181    let mut ace = 0.0;
182    let mut start = 0;
183    for g in 0..n_bins {
184        if start >= n {
185            break;
186        }
187        let end = if g < n_bins - 1 {
188            (start + group_size).min(n)
189        } else {
190            n
191        };
192        ace += calibration_gap_weighted(&sorted_idx[start..end], y, &fit.probabilities, n);
193        start = end;
194    }
195
196    Ok(EceResult {
197        ece,
198        mce,
199        ace,
200        n_bins,
201        bin_ece_contributions,
202    })
203}
204
205// ===========================================================================
206// Conformal Prediction Residuals
207// ===========================================================================
208
209/// Result of split-conformal prediction.
210#[derive(Debug, Clone, PartialEq)]
211pub struct ConformalPredictionResult {
212    /// Predictions on test data (length n_test).
213    pub predictions: Vec<f64>,
214    /// Lower bounds of prediction intervals (length n_test).
215    pub lower: Vec<f64>,
216    /// Upper bounds of prediction intervals (length n_test).
217    pub upper: Vec<f64>,
218    /// Quantile of calibration residuals.
219    pub residual_quantile: f64,
220    /// Empirical coverage on the calibration set.
221    pub coverage: f64,
222    /// Absolute residuals on calibration set.
223    pub calibration_scores: Vec<f64>,
224}
225
226/// Split-conformal prediction intervals for a linear functional regression.
227///
228/// Randomly splits training data into proper-train and calibration subsets,
229/// refits the model, and constructs distribution-free prediction intervals.
230///
231/// # Arguments
232/// * `fit` — Original [`FregreLmResult`] (used for ncomp)
233/// * `train_data` — Training functional data (n × m)
234/// * `train_y` — Training response (length n)
235/// * `test_data` — Test functional data (n_test × m)
236/// * `scalar_covariates_train` — Optional scalar covariates for training
237/// * `scalar_covariates_test` — Optional scalar covariates for test
238/// * `cal_fraction` — Fraction of training data for calibration (0, 1)
239/// * `alpha` — Miscoverage level (e.g. 0.1 for 90% intervals)
240/// * `seed` — Random seed
241///
242/// # Errors
243///
244/// Returns [`FdarError::InvalidParameter`] if input dimensions or parameters
245/// are invalid (e.g., mismatched columns between train and test, `cal_fraction`
246/// outside (0, 1), `alpha` outside (0, 1), or too few training observations).
247/// May also propagate errors from [`crate::scalar_on_function::fregre_lm`]
248/// when refitting on the proper-training subset.
249#[must_use = "expensive computation whose result should not be discarded"]
250pub fn conformal_prediction_residuals(
251    fit: &FregreLmResult,
252    train_data: &FdMatrix,
253    train_y: &[f64],
254    test_data: &FdMatrix,
255    scalar_covariates_train: Option<&FdMatrix>,
256    scalar_covariates_test: Option<&FdMatrix>,
257    cal_fraction: f64,
258    alpha: f64,
259    seed: u64,
260) -> Result<ConformalPredictionResult, FdarError> {
261    let (n, m) = train_data.shape();
262    let (n_test, m_test) = test_data.shape();
263    let ncomp = fit.ncomp;
264    let (_n_cal, n_proper) = validate_conformal_inputs(
265        n,
266        m,
267        n_test,
268        m_test,
269        train_y.len(),
270        ncomp,
271        cal_fraction,
272        alpha,
273    )
274    .ok_or_else(|| FdarError::InvalidParameter {
275        parameter: "conformal_prediction_residuals",
276        message: "invalid input dimensions or parameters".into(),
277    })?;
278
279    // Random split
280    let mut rng = StdRng::seed_from_u64(seed);
281    let mut all_idx: Vec<usize> = (0..n).collect();
282    all_idx.shuffle(&mut rng);
283    let proper_idx = &all_idx[..n_proper];
284    let cal_idx = &all_idx[n_proper..];
285
286    // Subsample data
287    let proper_data = subsample_rows(train_data, proper_idx);
288    let proper_y: Vec<f64> = proper_idx.iter().map(|&i| train_y[i]).collect();
289    let proper_sc = scalar_covariates_train.map(|sc| subsample_rows(sc, proper_idx));
290
291    // Refit on proper-train
292    let refit = fregre_lm(&proper_data, &proper_y, proper_sc.as_ref(), ncomp)?;
293
294    // Predict on calibration set
295    let cal_data = subsample_rows(train_data, cal_idx);
296    let cal_sc = scalar_covariates_train.map(|sc| subsample_rows(sc, cal_idx));
297    let cal_scores = project_scores(&cal_data, &refit.fpca.mean, &refit.fpca.rotation, ncomp);
298    let cal_preds = predict_from_scores(
299        &cal_scores,
300        &refit.coefficients,
301        &refit.gamma,
302        cal_sc.as_ref(),
303        ncomp,
304    );
305    let cal_n = cal_idx.len();
306
307    let calibration_scores: Vec<f64> = cal_idx
308        .iter()
309        .enumerate()
310        .map(|(i, &orig)| (train_y[orig] - cal_preds[i]).abs())
311        .collect();
312
313    let (residual_quantile, coverage) =
314        conformal_quantile_and_coverage(&calibration_scores, cal_n, alpha);
315
316    // Predict on test data
317    let test_scores = project_scores(test_data, &refit.fpca.mean, &refit.fpca.rotation, ncomp);
318    let predictions = predict_from_scores(
319        &test_scores,
320        &refit.coefficients,
321        &refit.gamma,
322        scalar_covariates_test,
323        ncomp,
324    );
325
326    let lower: Vec<f64> = predictions.iter().map(|&p| p - residual_quantile).collect();
327    let upper: Vec<f64> = predictions.iter().map(|&p| p + residual_quantile).collect();
328
329    Ok(ConformalPredictionResult {
330        predictions,
331        lower,
332        upper,
333        residual_quantile,
334        coverage,
335        calibration_scores,
336    })
337}
338
339// ===========================================================================
340// Regression Depth
341// ===========================================================================
342
343/// Type of functional depth measure for regression diagnostics.
344#[derive(Debug, Clone, Copy, PartialEq, Eq)]
345pub enum DepthType {
346    FraimanMuniz,
347    ModifiedBand,
348    FunctionalSpatial,
349}
350
351/// Result of regression depth analysis.
352#[derive(Debug, Clone, PartialEq)]
353pub struct RegressionDepthResult {
354    /// Depth of β̂ in bootstrap distribution.
355    pub beta_depth: f64,
356    /// Depth of each observation's FPC scores (length n).
357    pub score_depths: Vec<f64>,
358    /// Mean of score_depths.
359    pub mean_score_depth: f64,
360    /// Depth method used.
361    pub depth_type: DepthType,
362    /// Number of successful bootstrap refits.
363    pub n_boot_success: usize,
364}
365
366/// Regression depth diagnostics for a linear functional regression.
367///
368/// Computes depth of each observation's FPC scores and depth of the
369/// regression coefficients in a bootstrap distribution.
370///
371/// # Arguments
372/// * `fit` — Fitted [`FregreLmResult`]
373/// * `data` — Functional data (n × m)
374/// * `y` — Response (length n)
375/// * `scalar_covariates` — Optional scalar covariates
376/// * `n_boot` — Number of bootstrap iterations
377/// * `depth_type` — Which depth measure to use
378/// * `seed` — Random seed
379///
380/// # Errors
381///
382/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 4 rows,
383/// zero columns, or `y.len()` does not match the row count.
384/// Returns [`FdarError::InvalidParameter`] if `n_boot` is zero.
385/// Returns [`FdarError::ComputationFailed`] if score depth computation returns
386/// empty.
387#[must_use = "expensive computation whose result should not be discarded"]
388pub fn regression_depth(
389    fit: &FregreLmResult,
390    data: &FdMatrix,
391    y: &[f64],
392    scalar_covariates: Option<&FdMatrix>,
393    n_boot: usize,
394    depth_type: DepthType,
395    seed: u64,
396) -> Result<RegressionDepthResult, FdarError> {
397    let (n, m) = data.shape();
398    if n < 4 {
399        return Err(FdarError::InvalidDimension {
400            parameter: "data",
401            expected: ">=4 rows".into(),
402            actual: format!("{n}"),
403        });
404    }
405    if m == 0 {
406        return Err(FdarError::InvalidDimension {
407            parameter: "data",
408            expected: ">0 columns".into(),
409            actual: "0".into(),
410        });
411    }
412    if n != y.len() {
413        return Err(FdarError::InvalidDimension {
414            parameter: "y",
415            expected: format!("{n} (matching data rows)"),
416            actual: format!("{}", y.len()),
417        });
418    }
419    if n_boot == 0 {
420        return Err(FdarError::InvalidParameter {
421            parameter: "n_boot",
422            message: "must be > 0".into(),
423        });
424    }
425    let ncomp = fit.ncomp;
426    let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
427    let score_depths = compute_score_depths(&scores, depth_type);
428    if score_depths.is_empty() {
429        return Err(FdarError::ComputationFailed {
430            operation: "regression_depth",
431            detail: "score depth computation returned empty".into(),
432        });
433    }
434    let mean_score_depth = score_depths.iter().sum::<f64>() / score_depths.len() as f64;
435
436    let orig_coefs: Vec<f64> = (0..ncomp).map(|k| fit.coefficients[1 + k]).collect();
437    let mut rng = StdRng::seed_from_u64(seed);
438    let mut boot_coefs = Vec::new();
439    for _ in 0..n_boot {
440        let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
441        let boot_data = subsample_rows(data, &idx);
442        let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
443        let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
444        if let Ok(refit) = fregre_lm(&boot_data, &boot_y, boot_sc.as_ref(), ncomp) {
445            boot_coefs.push((0..ncomp).map(|k| refit.coefficients[1 + k]).collect());
446        }
447    }
448
449    let beta_depth = beta_depth_from_bootstrap(&boot_coefs, &orig_coefs, ncomp, depth_type);
450
451    Ok(RegressionDepthResult {
452        beta_depth,
453        score_depths,
454        mean_score_depth,
455        depth_type,
456        n_boot_success: boot_coefs.len(),
457    })
458}
459
460/// Regression depth diagnostics for a functional logistic regression.
461///
462/// # Errors
463///
464/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 4 rows,
465/// zero columns, or `y.len()` does not match the row count.
466/// Returns [`FdarError::InvalidParameter`] if `n_boot` is zero.
467/// Returns [`FdarError::ComputationFailed`] if score depth computation returns
468/// empty.
469#[must_use = "expensive computation whose result should not be discarded"]
470pub fn regression_depth_logistic(
471    fit: &FunctionalLogisticResult,
472    data: &FdMatrix,
473    y: &[f64],
474    scalar_covariates: Option<&FdMatrix>,
475    n_boot: usize,
476    depth_type: DepthType,
477    seed: u64,
478) -> Result<RegressionDepthResult, FdarError> {
479    let (n, m) = data.shape();
480    if n < 4 {
481        return Err(FdarError::InvalidDimension {
482            parameter: "data",
483            expected: ">=4 rows".into(),
484            actual: format!("{n}"),
485        });
486    }
487    if m == 0 {
488        return Err(FdarError::InvalidDimension {
489            parameter: "data",
490            expected: ">0 columns".into(),
491            actual: "0".into(),
492        });
493    }
494    if n != y.len() {
495        return Err(FdarError::InvalidDimension {
496            parameter: "y",
497            expected: format!("{n} (matching data rows)"),
498            actual: format!("{}", y.len()),
499        });
500    }
501    if n_boot == 0 {
502        return Err(FdarError::InvalidParameter {
503            parameter: "n_boot",
504            message: "must be > 0".into(),
505        });
506    }
507    let ncomp = fit.ncomp;
508    let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
509    let score_depths = compute_score_depths(&scores, depth_type);
510    if score_depths.is_empty() {
511        return Err(FdarError::ComputationFailed {
512            operation: "regression_depth_logistic",
513            detail: "score depth computation returned empty".into(),
514        });
515    }
516    let mean_score_depth = score_depths.iter().sum::<f64>() / score_depths.len() as f64;
517
518    let orig_coefs: Vec<f64> = (0..ncomp).map(|k| fit.coefficients[1 + k]).collect();
519    let mut rng = StdRng::seed_from_u64(seed);
520    let boot_coefs =
521        bootstrap_logistic_coefs(data, y, scalar_covariates, n, ncomp, n_boot, &mut rng);
522
523    let beta_depth = beta_depth_from_bootstrap(&boot_coefs, &orig_coefs, ncomp, depth_type);
524
525    Ok(RegressionDepthResult {
526        beta_depth,
527        score_depths,
528        mean_score_depth,
529        depth_type,
530        n_boot_success: boot_coefs.len(),
531    })
532}
533
534// ===========================================================================
535// Stability / Robustness Analysis
536// ===========================================================================
537
538/// Result of bootstrap stability analysis.
539#[derive(Debug, Clone, PartialEq)]
540pub struct StabilityAnalysisResult {
541    /// Pointwise std of β(t) across bootstraps (length m).
542    pub beta_t_std: Vec<f64>,
543    /// Std of FPC coefficients γ_k across bootstraps (length ncomp).
544    pub coefficient_std: Vec<f64>,
545    /// Std of R² or accuracy across bootstraps.
546    pub metric_std: f64,
547    /// Coefficient of variation of β(t): std / |mean| (length m).
548    pub beta_t_cv: Vec<f64>,
549    /// Mean Spearman rank correlation of FPC importance rankings.
550    pub importance_stability: f64,
551    /// Number of successful bootstrap refits.
552    pub n_boot_success: usize,
553}
554
555/// Bootstrap stability analysis of a linear functional regression.
556///
557/// Refits the model on `n_boot` bootstrap samples and reports variability
558/// of β(t), FPC coefficients, R², and importance rankings.
559///
560/// # Errors
561///
562/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 4 rows,
563/// zero columns, or `y.len()` does not match the row count.
564/// Returns [`FdarError::InvalidParameter`] if `n_boot < 2` or `ncomp` is zero.
565/// Returns [`FdarError::ComputationFailed`] if there are insufficient successful
566/// bootstrap refits.
567#[must_use = "expensive computation whose result should not be discarded"]
568pub fn explanation_stability(
569    data: &FdMatrix,
570    y: &[f64],
571    scalar_covariates: Option<&FdMatrix>,
572    ncomp: usize,
573    n_boot: usize,
574    seed: u64,
575) -> Result<StabilityAnalysisResult, FdarError> {
576    let (n, m) = data.shape();
577    if n < 4 {
578        return Err(FdarError::InvalidDimension {
579            parameter: "data",
580            expected: ">=4 rows".into(),
581            actual: format!("{n}"),
582        });
583    }
584    if m == 0 {
585        return Err(FdarError::InvalidDimension {
586            parameter: "data",
587            expected: ">0 columns".into(),
588            actual: "0".into(),
589        });
590    }
591    if n != y.len() {
592        return Err(FdarError::InvalidDimension {
593            parameter: "y",
594            expected: format!("{n} (matching data rows)"),
595            actual: format!("{}", y.len()),
596        });
597    }
598    if n_boot < 2 {
599        return Err(FdarError::InvalidParameter {
600            parameter: "n_boot",
601            message: "must be >= 2".into(),
602        });
603    }
604    if ncomp == 0 {
605        return Err(FdarError::InvalidParameter {
606            parameter: "ncomp",
607            message: "must be > 0".into(),
608        });
609    }
610
611    let mut rng = StdRng::seed_from_u64(seed);
612    let mut all_beta_t: Vec<Vec<f64>> = Vec::new();
613    let mut all_coefs: Vec<Vec<f64>> = Vec::new();
614    let mut all_metrics: Vec<f64> = Vec::new();
615    let mut all_abs_coefs: Vec<Vec<f64>> = Vec::new();
616
617    for _ in 0..n_boot {
618        let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
619        let boot_data = subsample_rows(data, &idx);
620        let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
621        let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
622        if let Ok(refit) = fregre_lm(&boot_data, &boot_y, boot_sc.as_ref(), ncomp) {
623            all_beta_t.push(refit.beta_t.clone());
624            let coefs: Vec<f64> = (0..ncomp).map(|k| refit.coefficients[1 + k]).collect();
625            all_abs_coefs.push(coefs.iter().map(|c| c.abs()).collect());
626            all_coefs.push(coefs);
627            all_metrics.push(refit.r_squared);
628        }
629    }
630
631    build_stability_result(
632        &all_beta_t,
633        &all_coefs,
634        &all_abs_coefs,
635        &all_metrics,
636        m,
637        ncomp,
638    )
639    .ok_or_else(|| FdarError::ComputationFailed {
640        operation: "explanation_stability",
641        detail: "insufficient successful bootstrap refits".into(),
642    })
643}
644
645/// Bootstrap stability analysis of a functional logistic regression.
646///
647/// # Errors
648///
649/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 4 rows,
650/// zero columns, or `y.len()` does not match the row count.
651/// Returns [`FdarError::InvalidParameter`] if `n_boot < 2` or `ncomp` is zero.
652/// Returns [`FdarError::ComputationFailed`] if there are insufficient successful
653/// bootstrap refits.
654#[must_use = "expensive computation whose result should not be discarded"]
655pub fn explanation_stability_logistic(
656    data: &FdMatrix,
657    y: &[f64],
658    scalar_covariates: Option<&FdMatrix>,
659    ncomp: usize,
660    n_boot: usize,
661    seed: u64,
662) -> Result<StabilityAnalysisResult, FdarError> {
663    let (n, m) = data.shape();
664    if n < 4 {
665        return Err(FdarError::InvalidDimension {
666            parameter: "data",
667            expected: ">=4 rows".into(),
668            actual: format!("{n}"),
669        });
670    }
671    if m == 0 {
672        return Err(FdarError::InvalidDimension {
673            parameter: "data",
674            expected: ">0 columns".into(),
675            actual: "0".into(),
676        });
677    }
678    if n != y.len() {
679        return Err(FdarError::InvalidDimension {
680            parameter: "y",
681            expected: format!("{n} (matching data rows)"),
682            actual: format!("{}", y.len()),
683        });
684    }
685    if n_boot < 2 {
686        return Err(FdarError::InvalidParameter {
687            parameter: "n_boot",
688            message: "must be >= 2".into(),
689        });
690    }
691    if ncomp == 0 {
692        return Err(FdarError::InvalidParameter {
693            parameter: "ncomp",
694            message: "must be > 0".into(),
695        });
696    }
697
698    let mut rng = StdRng::seed_from_u64(seed);
699    let (all_beta_t, all_coefs, all_abs_coefs, all_metrics) =
700        bootstrap_logistic_stability(data, y, scalar_covariates, n, ncomp, n_boot, &mut rng);
701
702    build_stability_result(
703        &all_beta_t,
704        &all_coefs,
705        &all_abs_coefs,
706        &all_metrics,
707        m,
708        ncomp,
709    )
710    .ok_or_else(|| FdarError::ComputationFailed {
711        operation: "explanation_stability_logistic",
712        detail: "insufficient successful bootstrap refits".into(),
713    })
714}
715
716// ===========================================================================
717// Anchors / Rule Extraction
718// ===========================================================================
719
720/// A single condition in an anchor rule.
721#[derive(Debug, Clone, PartialEq)]
722pub struct AnchorCondition {
723    /// FPC component index.
724    pub component: usize,
725    /// Lower bound on FPC score.
726    pub lower_bound: f64,
727    /// Upper bound on FPC score.
728    pub upper_bound: f64,
729}
730
731/// An anchor rule consisting of FPC score conditions.
732#[derive(Debug, Clone, PartialEq)]
733pub struct AnchorRule {
734    /// Conditions forming the rule (conjunction).
735    pub conditions: Vec<AnchorCondition>,
736    /// Precision: fraction of matching observations with same prediction.
737    pub precision: f64,
738    /// Coverage: fraction of all observations matching the rule.
739    pub coverage: f64,
740    /// Number of observations matching the rule.
741    pub n_matching: usize,
742}
743
744/// Result of anchor explanation for one observation.
745#[derive(Debug, Clone, PartialEq)]
746pub struct AnchorResult {
747    /// The anchor rule.
748    pub rule: AnchorRule,
749    /// Which observation was explained.
750    pub observation: usize,
751    /// Predicted value for the observation.
752    pub predicted_value: f64,
753}
754
755/// Anchor explanation for a linear functional regression.
756///
757/// Uses beam search in FPC score space to find a minimal set of conditions
758/// (score bin memberships) that locally "anchor" the prediction.
759///
760/// # Arguments
761/// * `fit` — Fitted [`FregreLmResult`]
762/// * `data` — Functional data (n × m)
763/// * `scalar_covariates` — Optional scalar covariates
764/// * `observation` — Index of observation to explain
765/// * `precision_threshold` — Minimum precision (e.g. 0.95)
766/// * `n_bins` — Number of quantile bins per FPC dimension
767///
768/// # Errors
769///
770/// Returns [`FdarError::InvalidDimension`] if `data` has zero rows or its column
771/// count does not match `fit.fpca.mean`.
772/// Returns [`FdarError::InvalidParameter`] if `observation >= n` or `n_bins < 2`.
773#[must_use = "expensive computation whose result should not be discarded"]
774pub fn anchor_explanation(
775    fit: &FregreLmResult,
776    data: &FdMatrix,
777    scalar_covariates: Option<&FdMatrix>,
778    observation: usize,
779    precision_threshold: f64,
780    n_bins: usize,
781) -> Result<AnchorResult, FdarError> {
782    let (n, m) = data.shape();
783    if n == 0 {
784        return Err(FdarError::InvalidDimension {
785            parameter: "data",
786            expected: ">0 rows".into(),
787            actual: "0".into(),
788        });
789    }
790    if m != fit.fpca.mean.len() {
791        return Err(FdarError::InvalidDimension {
792            parameter: "data",
793            expected: format!("{} columns", fit.fpca.mean.len()),
794            actual: format!("{m}"),
795        });
796    }
797    if observation >= n {
798        return Err(FdarError::InvalidParameter {
799            parameter: "observation",
800            message: format!("observation {observation} >= n {n}"),
801        });
802    }
803    if n_bins < 2 {
804        return Err(FdarError::InvalidParameter {
805            parameter: "n_bins",
806            message: "must be >= 2".into(),
807        });
808    }
809    let ncomp = fit.ncomp;
810    let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
811    let obs_pred = fit.fitted_values[observation];
812    let tol = fit.residual_se;
813
814    // "Same prediction" for regression: within ±1 residual_se
815    let same_pred = |i: usize| -> bool {
816        let mut yhat = fit.coefficients[0];
817        for k in 0..ncomp {
818            yhat += fit.coefficients[1 + k] * scores[(i, k)];
819        }
820        if let Some(sc) = scalar_covariates {
821            for j in 0..fit.gamma.len() {
822                yhat += fit.gamma[j] * sc[(i, j)];
823            }
824        }
825        (yhat - obs_pred).abs() <= tol
826    };
827
828    let (rule, _) = anchor_beam_search(
829        &scores,
830        ncomp,
831        n,
832        observation,
833        precision_threshold,
834        n_bins,
835        &same_pred,
836    );
837
838    Ok(AnchorResult {
839        rule,
840        observation,
841        predicted_value: obs_pred,
842    })
843}
844
845/// Anchor explanation for a functional logistic regression.
846///
847/// "Same prediction" = same predicted class.
848///
849/// # Errors
850///
851/// Returns [`FdarError::InvalidDimension`] if `data` has zero rows or its column
852/// count does not match `fit.fpca.mean`.
853/// Returns [`FdarError::InvalidParameter`] if `observation >= n` or `n_bins < 2`.
854#[must_use = "expensive computation whose result should not be discarded"]
855pub fn anchor_explanation_logistic(
856    fit: &FunctionalLogisticResult,
857    data: &FdMatrix,
858    scalar_covariates: Option<&FdMatrix>,
859    observation: usize,
860    precision_threshold: f64,
861    n_bins: usize,
862) -> Result<AnchorResult, FdarError> {
863    let (n, m) = data.shape();
864    if n == 0 {
865        return Err(FdarError::InvalidDimension {
866            parameter: "data",
867            expected: ">0 rows".into(),
868            actual: "0".into(),
869        });
870    }
871    if m != fit.fpca.mean.len() {
872        return Err(FdarError::InvalidDimension {
873            parameter: "data",
874            expected: format!("{} columns", fit.fpca.mean.len()),
875            actual: format!("{m}"),
876        });
877    }
878    if observation >= n {
879        return Err(FdarError::InvalidParameter {
880            parameter: "observation",
881            message: format!("observation {observation} >= n {n}"),
882        });
883    }
884    if n_bins < 2 {
885        return Err(FdarError::InvalidParameter {
886            parameter: "n_bins",
887            message: "must be >= 2".into(),
888        });
889    }
890    let ncomp = fit.ncomp;
891    let scores = project_scores(data, &fit.fpca.mean, &fit.fpca.rotation, ncomp);
892    let obs_class = fit.predicted_classes[observation];
893    let obs_prob = fit.probabilities[observation];
894    let p_scalar = fit.gamma.len();
895
896    // "Same prediction" = same class
897    let same_pred = |i: usize| -> bool {
898        let mut eta = fit.intercept;
899        for k in 0..ncomp {
900            eta += fit.coefficients[1 + k] * scores[(i, k)];
901        }
902        if let Some(sc) = scalar_covariates {
903            for j in 0..p_scalar {
904                eta += fit.gamma[j] * sc[(i, j)];
905            }
906        }
907        let pred_class = if sigmoid(eta) >= 0.5 { 1usize } else { 0usize };
908        pred_class == obs_class
909    };
910
911    let (rule, _) = anchor_beam_search(
912        &scores,
913        ncomp,
914        n,
915        observation,
916        precision_threshold,
917        n_bins,
918        &same_pred,
919    );
920
921    Ok(AnchorResult {
922        rule,
923        observation,
924        predicted_value: obs_prob,
925    })
926}
927
928// ---------------------------------------------------------------------------
929// Private helpers
930// ---------------------------------------------------------------------------
931
932/// Hosmer-Lemeshow computation.
933fn hosmer_lemeshow_computation(
934    probabilities: &[f64],
935    y: &[f64],
936    n: usize,
937    n_groups: usize,
938) -> (f64, Vec<(f64, f64)>, Vec<usize>) {
939    let mut sorted_idx: Vec<usize> = (0..n).collect();
940    sorted_idx.sort_by(|&a, &b| {
941        probabilities[a]
942            .partial_cmp(&probabilities[b])
943            .unwrap_or(std::cmp::Ordering::Equal)
944    });
945
946    let group_size = n / n_groups;
947    let remainder = n % n_groups;
948    let mut start = 0;
949
950    let mut chi2 = 0.0;
951    let mut reliability_bins = Vec::with_capacity(n_groups);
952    let mut bin_counts = Vec::with_capacity(n_groups);
953
954    for g in 0..n_groups {
955        let sz = group_size + if g < remainder { 1 } else { 0 };
956        let group = &sorted_idx[start..start + sz];
957        start += sz;
958
959        let ng = group.len();
960        if ng == 0 {
961            continue;
962        }
963        let o_g: f64 = group.iter().map(|&i| y[i]).sum();
964        let e_g: f64 = group.iter().map(|&i| probabilities[i]).sum();
965        let p_bar = e_g / ng as f64;
966        let mean_obs = o_g / ng as f64;
967
968        reliability_bins.push((p_bar, mean_obs));
969        bin_counts.push(ng);
970
971        let denom = ng as f64 * p_bar * (1.0 - p_bar);
972        if denom > 1e-15 {
973            chi2 += (o_g - e_g).powi(2) / denom;
974        }
975    }
976
977    (chi2, reliability_bins, bin_counts)
978}
979
980/// Compute equal-width ECE, MCE, and per-bin contributions.
981fn compute_equal_width_ece(
982    probabilities: &[f64],
983    y: &[f64],
984    n: usize,
985    n_bins: usize,
986) -> (f64, f64, Vec<f64>) {
987    let mut bin_sum_y = vec![0.0; n_bins];
988    let mut bin_sum_p = vec![0.0; n_bins];
989    let mut bin_count = vec![0usize; n_bins];
990
991    for i in 0..n {
992        let b = ((probabilities[i] * n_bins as f64).floor() as usize).min(n_bins - 1);
993        bin_sum_y[b] += y[i];
994        bin_sum_p[b] += probabilities[i];
995        bin_count[b] += 1;
996    }
997
998    let mut ece = 0.0;
999    let mut mce: f64 = 0.0;
1000    let mut bin_ece_contributions = vec![0.0; n_bins];
1001
1002    for b in 0..n_bins {
1003        if bin_count[b] == 0 {
1004            continue;
1005        }
1006        let gap = (bin_sum_y[b] / bin_count[b] as f64 - bin_sum_p[b] / bin_count[b] as f64).abs();
1007        let contrib = bin_count[b] as f64 / n as f64 * gap;
1008        bin_ece_contributions[b] = contrib;
1009        ece += contrib;
1010        if gap > mce {
1011            mce = gap;
1012        }
1013    }
1014
1015    (ece, mce, bin_ece_contributions)
1016}
1017
1018/// Bootstrap logistic stability: collect beta_t, coefs, abs_coefs, and metrics.
1019fn bootstrap_logistic_stability(
1020    data: &FdMatrix,
1021    y: &[f64],
1022    scalar_covariates: Option<&FdMatrix>,
1023    n: usize,
1024    ncomp: usize,
1025    n_boot: usize,
1026    rng: &mut StdRng,
1027) -> (Vec<Vec<f64>>, Vec<Vec<f64>>, Vec<Vec<f64>>, Vec<f64>) {
1028    let mut all_beta_t: Vec<Vec<f64>> = Vec::new();
1029    let mut all_coefs: Vec<Vec<f64>> = Vec::new();
1030    let mut all_abs_coefs: Vec<Vec<f64>> = Vec::new();
1031    let mut all_metrics: Vec<f64> = Vec::new();
1032
1033    for _ in 0..n_boot {
1034        let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
1035        let boot_data = subsample_rows(data, &idx);
1036        let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
1037        let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
1038        let has_both = boot_y.iter().any(|&v| v < 0.5) && boot_y.iter().any(|&v| v >= 0.5);
1039        if !has_both {
1040            continue;
1041        }
1042        if let Ok(refit) =
1043            functional_logistic(&boot_data, &boot_y, boot_sc.as_ref(), ncomp, 25, 1e-6)
1044        {
1045            all_beta_t.push(refit.beta_t.clone());
1046            let coefs: Vec<f64> = (0..ncomp).map(|k| refit.coefficients[1 + k]).collect();
1047            all_abs_coefs.push(coefs.iter().map(|c| c.abs()).collect());
1048            all_coefs.push(coefs);
1049            all_metrics.push(refit.accuracy);
1050        }
1051    }
1052
1053    (all_beta_t, all_coefs, all_abs_coefs, all_metrics)
1054}
1055
1056/// Bootstrap logistic coefficients for regression depth.
1057fn bootstrap_logistic_coefs(
1058    data: &FdMatrix,
1059    y: &[f64],
1060    scalar_covariates: Option<&FdMatrix>,
1061    n: usize,
1062    ncomp: usize,
1063    n_boot: usize,
1064    rng: &mut StdRng,
1065) -> Vec<Vec<f64>> {
1066    let mut boot_coefs = Vec::new();
1067    for _ in 0..n_boot {
1068        let idx: Vec<usize> = (0..n).map(|_| rng.gen_range(0..n)).collect();
1069        let boot_data = subsample_rows(data, &idx);
1070        let boot_y: Vec<f64> = idx.iter().map(|&i| y[i]).collect();
1071        let boot_sc = scalar_covariates.map(|sc| subsample_rows(sc, &idx));
1072        let has_both = boot_y.iter().any(|&v| v < 0.5) && boot_y.iter().any(|&v| v >= 0.5);
1073        if !has_both {
1074            continue;
1075        }
1076        if let Ok(refit) =
1077            functional_logistic(&boot_data, &boot_y, boot_sc.as_ref(), ncomp, 25, 1e-6)
1078        {
1079            boot_coefs.push((0..ncomp).map(|k| refit.coefficients[1 + k]).collect());
1080        }
1081    }
1082    boot_coefs
1083}
fdars_core/explain/advanced.rs

fdars_core/explain/
advanced.rs