use crate::error::FdarError;
use crate::matrix::FdMatrix;
pub mod classification;
pub mod cv;
pub mod elastic;
pub mod generic;
pub mod regression;
#[cfg(test)]
mod tests;
#[derive(Debug, Clone, Copy)]
#[non_exhaustive]
pub enum ConformalMethod {
Split,
CrossConformal { n_folds: usize },
JackknifePlus,
}
#[derive(Debug, Clone, Copy)]
#[non_exhaustive]
pub enum ClassificationScore {
Lac,
Aps,
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct ConformalRegressionResult {
pub predictions: Vec<f64>,
pub lower: Vec<f64>,
pub upper: Vec<f64>,
pub residual_quantile: f64,
pub coverage: f64,
pub calibration_scores: Vec<f64>,
pub method: ConformalMethod,
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct ConformalClassificationResult {
pub predicted_classes: Vec<usize>,
pub prediction_sets: Vec<Vec<usize>>,
pub set_sizes: Vec<usize>,
pub average_set_size: f64,
pub coverage: f64,
pub calibration_scores: Vec<f64>,
pub score_quantile: f64,
pub method: ConformalMethod,
pub score_type: ClassificationScore,
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct ConformalConfig {
pub cal_fraction: f64,
pub alpha: f64,
pub seed: u64,
}
impl Default for ConformalConfig {
fn default() -> Self {
Self {
cal_fraction: 0.25,
alpha: 0.1,
seed: 42,
}
}
}
pub(super) fn conformal_split(n: usize, cal_fraction: f64, seed: u64) -> (Vec<usize>, Vec<usize>) {
use rand::prelude::*;
let mut rng = StdRng::seed_from_u64(seed);
let mut all_idx: Vec<usize> = (0..n).collect();
all_idx.shuffle(&mut rng);
let n_cal = ((n as f64 * cal_fraction).round() as usize)
.max(2)
.min(n - 2);
let n_proper = n - n_cal;
let proper_idx = all_idx[..n_proper].to_vec();
let cal_idx = all_idx[n_proper..].to_vec();
(proper_idx, cal_idx)
}
pub(super) fn conformal_quantile(scores: &mut [f64], alpha: f64) -> f64 {
let n = scores.len();
if n == 0 {
return 0.0;
}
crate::helpers::sort_nan_safe(scores);
let k = ((n + 1) as f64 * (1.0 - alpha)).ceil() as usize;
if k > n {
return f64::INFINITY;
}
scores[k.saturating_sub(1)]
}
pub(super) fn empirical_coverage(scores: &[f64], quantile: f64) -> f64 {
let n = scores.len();
if n == 0 {
return 0.0;
}
scores.iter().filter(|&&s| s <= quantile).count() as f64 / n as f64
}
#[allow(unused_imports)]
pub(super) use crate::helpers::quantile_sorted;
pub(super) fn build_regression_result(
mut cal_residuals: Vec<f64>,
test_predictions: Vec<f64>,
alpha: f64,
method: ConformalMethod,
) -> ConformalRegressionResult {
let residual_quantile = conformal_quantile(&mut cal_residuals, alpha);
let coverage = empirical_coverage(&cal_residuals, residual_quantile);
let lower = test_predictions
.iter()
.map(|&p| p - residual_quantile)
.collect();
let upper = test_predictions
.iter()
.map(|&p| p + residual_quantile)
.collect();
ConformalRegressionResult {
predictions: test_predictions,
lower,
upper,
residual_quantile,
coverage,
calibration_scores: cal_residuals,
method,
}
}
pub(super) fn lac_score(probs: &[f64], true_class: usize) -> f64 {
if true_class < probs.len() {
1.0 - probs[true_class]
} else {
1.0
}
}
pub(super) fn aps_score(probs: &[f64], true_class: usize) -> f64 {
let g = probs.len();
let mut order: Vec<usize> = (0..g).collect();
order.sort_by(|&a, &b| {
probs[b]
.partial_cmp(&probs[a])
.unwrap_or(std::cmp::Ordering::Equal)
});
let mut cum = 0.0;
for &c in &order {
cum += probs[c];
if c == true_class {
return cum;
}
}
1.0
}
pub(super) fn lac_prediction_set(probs: &[f64], quantile: f64) -> Vec<usize> {
(0..probs.len())
.filter(|&k| 1.0 - probs[k] <= quantile)
.collect()
}
pub(super) fn aps_prediction_set(probs: &[f64], quantile: f64) -> Vec<usize> {
let g = probs.len();
let mut order: Vec<usize> = (0..g).collect();
order.sort_by(|&a, &b| {
probs[b]
.partial_cmp(&probs[a])
.unwrap_or(std::cmp::Ordering::Equal)
});
let mut cum = 0.0;
let mut set = Vec::new();
for &c in &order {
set.push(c);
cum += probs[c];
if cum >= quantile {
break;
}
}
if set.is_empty() && g > 0 {
set.push(order[0]);
}
set
}
pub(super) fn build_classification_result(
mut cal_scores: Vec<f64>,
test_probs: &[Vec<f64>],
test_pred_classes: Vec<usize>,
alpha: f64,
method: ConformalMethod,
score_type: ClassificationScore,
) -> ConformalClassificationResult {
let score_quantile = conformal_quantile(&mut cal_scores, alpha);
let coverage = empirical_coverage(&cal_scores, score_quantile);
let prediction_sets: Vec<Vec<usize>> = test_probs
.iter()
.map(|probs| match score_type {
ClassificationScore::Lac => lac_prediction_set(probs, score_quantile),
ClassificationScore::Aps => aps_prediction_set(probs, score_quantile),
})
.collect();
let set_sizes: Vec<usize> = prediction_sets.iter().map(std::vec::Vec::len).collect();
let average_set_size = if set_sizes.is_empty() {
0.0
} else {
set_sizes.iter().sum::<usize>() as f64 / set_sizes.len() as f64
};
ConformalClassificationResult {
predicted_classes: test_pred_classes,
prediction_sets,
set_sizes,
average_set_size,
coverage,
calibration_scores: cal_scores,
score_quantile,
method,
score_type,
}
}
pub(super) fn compute_cal_scores(
probs: &[Vec<f64>],
true_classes: &[usize],
score_type: ClassificationScore,
) -> Vec<f64> {
probs
.iter()
.zip(true_classes.iter())
.map(|(p, &y)| match score_type {
ClassificationScore::Lac => lac_score(p, y),
ClassificationScore::Aps => aps_score(p, y),
})
.collect()
}
pub(super) fn vstack(a: &FdMatrix, b: &FdMatrix) -> FdMatrix {
let m = a.ncols();
debug_assert_eq!(m, b.ncols());
let na = a.nrows();
let nb = b.nrows();
let mut out = FdMatrix::zeros(na + nb, m);
for j in 0..m {
for i in 0..na {
out[(i, j)] = a[(i, j)];
}
for i in 0..nb {
out[(na + i, j)] = b[(i, j)];
}
}
out
}
pub(super) fn vstack_opt(a: Option<&FdMatrix>, b: Option<&FdMatrix>) -> Option<FdMatrix> {
match (a, b) {
(Some(a), Some(b)) => Some(vstack(a, b)),
_ => None,
}
}
pub(super) fn subset_vec_usize(v: &[usize], indices: &[usize]) -> Vec<usize> {
indices.iter().map(|&i| v[i]).collect()
}
pub(super) fn subset_vec_i8(v: &[i8], indices: &[usize]) -> Vec<i8> {
indices.iter().map(|&i| v[i]).collect()
}
pub(super) fn argmax(probs: &[f64]) -> usize {
probs
.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.map_or(0, |(i, _)| i)
}
pub(super) fn validate_split_inputs(
n: usize,
n_test: usize,
cal_fraction: f64,
alpha: f64,
) -> Result<(), FdarError> {
if n < 4 {
return Err(FdarError::InvalidDimension {
parameter: "data",
expected: "at least 4 observations".to_string(),
actual: format!("{n}"),
});
}
if n_test == 0 {
return Err(FdarError::InvalidDimension {
parameter: "test_data",
expected: "at least 1 observation".to_string(),
actual: "0".to_string(),
});
}
if cal_fraction <= 0.0 || cal_fraction >= 1.0 {
return Err(FdarError::InvalidParameter {
parameter: "cal_fraction",
message: format!("must be in (0, 1), got {cal_fraction}"),
});
}
if alpha <= 0.0 || alpha >= 1.0 {
return Err(FdarError::InvalidParameter {
parameter: "alpha",
message: format!("must be in (0, 1), got {alpha}"),
});
}
Ok(())
}
pub use classification::{conformal_classif, conformal_elastic_logistic, conformal_logistic};
pub use cv::{cv_conformal_classification, cv_conformal_regression, jackknife_plus_regression};
pub use elastic::{
conformal_elastic_pcr, conformal_elastic_pcr_with_config, conformal_elastic_regression,
conformal_elastic_regression_with_config,
};
pub use generic::{conformal_generic_classification, conformal_generic_regression};
pub use regression::{conformal_fregre_lm, conformal_fregre_np};