fdars-core 0.10.0

Functional Data Analysis algorithms in Rust
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
//! Conformal prediction intervals and prediction sets.
//!
//! Provides distribution-free uncertainty quantification for all regression and
//! classification frameworks in fdars:
//!
//! **Regression** (prediction intervals):
//! - [`conformal_fregre_lm`] — Linear functional regression
//! - [`conformal_fregre_np`] — Nonparametric kernel regression
//! - [`conformal_elastic_regression`] — Elastic alignment regression
//! - [`conformal_elastic_pcr`] — Elastic principal component regression
//! - [`conformal_generic_regression`] — Any [`FpcPredictor`](crate::explain_generic::FpcPredictor) model
//! - [`cv_conformal_regression`] — Cross-conformal (CV+) with closure
//! - [`jackknife_plus_regression`] — Jackknife+ with closure
//!
//! **Classification** (prediction sets):
//! - [`conformal_classif`] — LDA / QDA / kNN classifiers
//! - [`conformal_logistic`] — Functional logistic regression
//! - [`conformal_elastic_logistic`] — Elastic logistic regression
//! - [`conformal_generic_classification`] — Any [`FpcPredictor`](crate::explain_generic::FpcPredictor) model
//! - [`cv_conformal_classification`] — Cross-conformal (CV+) with closure

use crate::error::FdarError;
use crate::matrix::FdMatrix;

pub mod classification;
pub mod cv;
pub mod elastic;
pub mod generic;
pub mod regression;

#[cfg(test)]
mod tests;

// ═══════════════════════════════════════════════════════════════════════════
// Types
// ═══════════════════════════════════════════════════════════════════════════

/// Split-conformal method variant.
#[derive(Debug, Clone, Copy)]
#[non_exhaustive]
pub enum ConformalMethod {
    /// Random split into proper-training and calibration.
    Split,
    /// K-fold cross-conformal (CV+).
    CrossConformal { n_folds: usize },
    /// Leave-one-out jackknife+.
    JackknifePlus,
}

/// Non-conformity score type for classification.
#[derive(Debug, Clone, Copy)]
#[non_exhaustive]
pub enum ClassificationScore {
    /// Least Ambiguous set-valued Classifier: `s = 1 - P(true class)`.
    Lac,
    /// Adaptive Prediction Sets: cumulative sorted probabilities.
    Aps,
}

/// Conformal prediction intervals for regression.
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct ConformalRegressionResult {
    /// Point predictions on test data.
    pub predictions: Vec<f64>,
    /// Lower bounds of prediction intervals.
    pub lower: Vec<f64>,
    /// Upper bounds of prediction intervals.
    pub upper: Vec<f64>,
    /// Quantile of calibration residuals.
    pub residual_quantile: f64,
    /// Empirical coverage on calibration set.
    pub coverage: f64,
    /// Absolute residuals on calibration set.
    pub calibration_scores: Vec<f64>,
    /// Method used.
    pub method: ConformalMethod,
}

/// Conformal prediction sets for classification.
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct ConformalClassificationResult {
    /// Argmax predictions for each test observation.
    pub predicted_classes: Vec<usize>,
    /// Set of plausible classes per test observation.
    pub prediction_sets: Vec<Vec<usize>>,
    /// Size of each prediction set.
    pub set_sizes: Vec<usize>,
    /// Mean prediction set size.
    pub average_set_size: f64,
    /// Empirical coverage on calibration set.
    pub coverage: f64,
    /// Non-conformity scores on calibration set.
    pub calibration_scores: Vec<f64>,
    /// Quantile of calibration scores.
    pub score_quantile: f64,
    /// Method used.
    pub method: ConformalMethod,
    /// Score type used.
    pub score_type: ClassificationScore,
}

/// Configuration for split-conformal prediction.
///
/// Collects the common tuning parameters shared by all conformal prediction
/// functions, with sensible defaults obtained via [`ConformalConfig::default()`].
///
/// # Example
/// ```no_run
/// use fdars_core::conformal::ConformalConfig;
///
/// let config = ConformalConfig {
///     alpha: 0.05,   // 95% coverage
///     ..ConformalConfig::default()
/// };
/// ```
#[derive(Debug, Clone)]
pub struct ConformalConfig {
    /// Fraction of data reserved for calibration (default: 0.25).
    pub cal_fraction: f64,
    /// Miscoverage level, e.g. 0.1 for 90% intervals (default: 0.1).
    pub alpha: f64,
    /// Random seed for the calibration/training split (default: 42).
    pub seed: u64,
}

impl Default for ConformalConfig {
    fn default() -> Self {
        Self {
            cal_fraction: 0.25,
            alpha: 0.1,
            seed: 42,
        }
    }
}

// ═══════════════════════════════════════════════════════════════════════════
// Core helpers
// ═══════════════════════════════════════════════════════════════════════════

/// Split indices into proper-training and calibration sets.
pub(super) fn conformal_split(n: usize, cal_fraction: f64, seed: u64) -> (Vec<usize>, Vec<usize>) {
    use rand::prelude::*;
    let mut rng = StdRng::seed_from_u64(seed);
    let mut all_idx: Vec<usize> = (0..n).collect();
    all_idx.shuffle(&mut rng);
    let n_cal = ((n as f64 * cal_fraction).round() as usize)
        .max(2)
        .min(n - 2);
    let n_proper = n - n_cal;
    let proper_idx = all_idx[..n_proper].to_vec();
    let cal_idx = all_idx[n_proper..].to_vec();
    (proper_idx, cal_idx)
}

/// Compute conformal quantile: the k-th smallest score where k = ceil((n+1)*(1-alpha)).
///
/// Uses exact order statistic (no interpolation) to preserve the finite-sample
/// coverage guarantee. Returns `f64::INFINITY` when k > n (conservative: infinite
/// interval gives 100% coverage).
pub(super) fn conformal_quantile(scores: &mut [f64], alpha: f64) -> f64 {
    let n = scores.len();
    if n == 0 {
        return 0.0;
    }
    crate::helpers::sort_nan_safe(scores);
    let k = ((n + 1) as f64 * (1.0 - alpha)).ceil() as usize;
    if k > n {
        return f64::INFINITY;
    }
    scores[k.saturating_sub(1)]
}

/// Empirical coverage: fraction of scores <= quantile.
pub(super) fn empirical_coverage(scores: &[f64], quantile: f64) -> f64 {
    let n = scores.len();
    if n == 0 {
        return 0.0;
    }
    scores.iter().filter(|&&s| s <= quantile).count() as f64 / n as f64
}

/// Quantile of a pre-sorted slice using linear interpolation (for non-conformal uses).
#[allow(dead_code)]
pub(super) fn quantile_sorted(sorted: &[f64], q: f64) -> f64 {
    let n = sorted.len();
    if n == 0 {
        return 0.0;
    }
    if n == 1 {
        return sorted[0];
    }
    let idx = q * (n - 1) as f64;
    let lo = (idx.floor() as usize).min(n - 1);
    let hi = (idx.ceil() as usize).min(n - 1);
    if lo == hi {
        sorted[lo]
    } else {
        let frac = idx - lo as f64;
        sorted[lo] * (1.0 - frac) + sorted[hi] * frac
    }
}

/// Build regression result from calibration residuals and test predictions.
pub(super) fn build_regression_result(
    mut cal_residuals: Vec<f64>,
    test_predictions: Vec<f64>,
    alpha: f64,
    method: ConformalMethod,
) -> ConformalRegressionResult {
    let residual_quantile = conformal_quantile(&mut cal_residuals, alpha);
    let coverage = empirical_coverage(&cal_residuals, residual_quantile);
    let lower = test_predictions
        .iter()
        .map(|&p| p - residual_quantile)
        .collect();
    let upper = test_predictions
        .iter()
        .map(|&p| p + residual_quantile)
        .collect();
    ConformalRegressionResult {
        predictions: test_predictions,
        lower,
        upper,
        residual_quantile,
        coverage,
        calibration_scores: cal_residuals,
        method,
    }
}

/// Compute LAC non-conformity score: 1 - P(true class).
pub(super) fn lac_score(probs: &[f64], true_class: usize) -> f64 {
    if true_class < probs.len() {
        1.0 - probs[true_class]
    } else {
        1.0
    }
}

/// Compute APS non-conformity score: cumulative probability until true class is included.
pub(super) fn aps_score(probs: &[f64], true_class: usize) -> f64 {
    let g = probs.len();
    let mut order: Vec<usize> = (0..g).collect();
    order.sort_by(|&a, &b| {
        probs[b]
            .partial_cmp(&probs[a])
            .unwrap_or(std::cmp::Ordering::Equal)
    });
    let mut cum = 0.0;
    for &c in &order {
        cum += probs[c];
        if c == true_class {
            return cum;
        }
    }
    1.0
}

/// Build LAC prediction set: include class k if 1 - P(k) <= quantile.
pub(super) fn lac_prediction_set(probs: &[f64], quantile: f64) -> Vec<usize> {
    (0..probs.len())
        .filter(|&k| 1.0 - probs[k] <= quantile)
        .collect()
}

/// Build APS prediction set: include classes in descending probability order until cumulative >= quantile.
///
/// The APS non-conformity score is the cumulative probability until the true class
/// is included. A class k is in the prediction set if its APS score <= the calibration
/// quantile, which means we include classes until cumulative probability reaches the quantile.
pub(super) fn aps_prediction_set(probs: &[f64], quantile: f64) -> Vec<usize> {
    let g = probs.len();
    let mut order: Vec<usize> = (0..g).collect();
    order.sort_by(|&a, &b| {
        probs[b]
            .partial_cmp(&probs[a])
            .unwrap_or(std::cmp::Ordering::Equal)
    });
    let mut cum = 0.0;
    let mut set = Vec::new();
    for &c in &order {
        set.push(c);
        cum += probs[c];
        if cum >= quantile {
            break;
        }
    }
    if set.is_empty() && g > 0 {
        set.push(order[0]);
    }
    set
}

/// Build classification result from calibration scores and test probabilities.
pub(super) fn build_classification_result(
    mut cal_scores: Vec<f64>,
    test_probs: &[Vec<f64>],
    test_pred_classes: Vec<usize>,
    alpha: f64,
    method: ConformalMethod,
    score_type: ClassificationScore,
) -> ConformalClassificationResult {
    let score_quantile = conformal_quantile(&mut cal_scores, alpha);
    let coverage = empirical_coverage(&cal_scores, score_quantile);

    let prediction_sets: Vec<Vec<usize>> = test_probs
        .iter()
        .map(|probs| match score_type {
            ClassificationScore::Lac => lac_prediction_set(probs, score_quantile),
            ClassificationScore::Aps => aps_prediction_set(probs, score_quantile),
        })
        .collect();

    let set_sizes: Vec<usize> = prediction_sets.iter().map(std::vec::Vec::len).collect();
    let average_set_size = if set_sizes.is_empty() {
        0.0
    } else {
        set_sizes.iter().sum::<usize>() as f64 / set_sizes.len() as f64
    };

    ConformalClassificationResult {
        predicted_classes: test_pred_classes,
        prediction_sets,
        set_sizes,
        average_set_size,
        coverage,
        calibration_scores: cal_scores,
        score_quantile,
        method,
        score_type,
    }
}

/// Compute non-conformity scores for classification calibration.
pub(super) fn compute_cal_scores(
    probs: &[Vec<f64>],
    true_classes: &[usize],
    score_type: ClassificationScore,
) -> Vec<f64> {
    probs
        .iter()
        .zip(true_classes.iter())
        .map(|(p, &y)| match score_type {
            ClassificationScore::Lac => lac_score(p, y),
            ClassificationScore::Aps => aps_score(p, y),
        })
        .collect()
}

/// Vertically stack two matrices with the same number of columns.
pub(super) fn vstack(a: &FdMatrix, b: &FdMatrix) -> FdMatrix {
    let m = a.ncols();
    debug_assert_eq!(m, b.ncols());
    let na = a.nrows();
    let nb = b.nrows();
    let mut out = FdMatrix::zeros(na + nb, m);
    for j in 0..m {
        for i in 0..na {
            out[(i, j)] = a[(i, j)];
        }
        for i in 0..nb {
            out[(na + i, j)] = b[(i, j)];
        }
    }
    out
}

/// Vertically stack two optional matrices.
pub(super) fn vstack_opt(a: Option<&FdMatrix>, b: Option<&FdMatrix>) -> Option<FdMatrix> {
    match (a, b) {
        (Some(a), Some(b)) => Some(vstack(a, b)),
        _ => None,
    }
}

/// Subset a usize vector by indices.
pub(super) fn subset_vec_usize(v: &[usize], indices: &[usize]) -> Vec<usize> {
    indices.iter().map(|&i| v[i]).collect()
}

/// Subset an i8 vector by indices.
pub(super) fn subset_vec_i8(v: &[i8], indices: &[usize]) -> Vec<i8> {
    indices.iter().map(|&i| v[i]).collect()
}

/// Argmax of a probability vector.
pub(super) fn argmax(probs: &[f64]) -> usize {
    probs
        .iter()
        .enumerate()
        .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
        .map_or(0, |(i, _)| i)
}

/// Validate common inputs for split conformal.
pub(super) fn validate_split_inputs(
    n: usize,
    n_test: usize,
    cal_fraction: f64,
    alpha: f64,
) -> Result<(), FdarError> {
    if n < 4 {
        return Err(FdarError::InvalidDimension {
            parameter: "data",
            expected: "at least 4 observations".to_string(),
            actual: format!("{n}"),
        });
    }
    if n_test == 0 {
        return Err(FdarError::InvalidDimension {
            parameter: "test_data",
            expected: "at least 1 observation".to_string(),
            actual: "0".to_string(),
        });
    }
    if cal_fraction <= 0.0 || cal_fraction >= 1.0 {
        return Err(FdarError::InvalidParameter {
            parameter: "cal_fraction",
            message: format!("must be in (0, 1), got {cal_fraction}"),
        });
    }
    if alpha <= 0.0 || alpha >= 1.0 {
        return Err(FdarError::InvalidParameter {
            parameter: "alpha",
            message: format!("must be in (0, 1), got {alpha}"),
        });
    }
    Ok(())
}

// ═══════════════════════════════════════════════════════════════════════════
// Re-exports — preserves the external API
// ═══════════════════════════════════════════════════════════════════════════

pub use classification::{conformal_classif, conformal_elastic_logistic, conformal_logistic};
pub use cv::{cv_conformal_classification, cv_conformal_regression, jackknife_plus_regression};
pub use elastic::{
    conformal_elastic_pcr, conformal_elastic_pcr_with_config, conformal_elastic_regression,
    conformal_elastic_regression_with_config,
};
pub use generic::{conformal_generic_classification, conformal_generic_regression};
pub use regression::{conformal_fregre_lm, conformal_fregre_np};