Skip to main content

fdars_core/conformal/
classification.rs

1//! Split-conformal prediction sets for classification models.
2
3use crate::classification::{
4    classif_predict_probs, fclassif_knn_fit, fclassif_lda_fit, fclassif_qda_fit, ClassifFit,
5};
6use crate::cv::subset_vec;
7use crate::elastic_regression::elastic_logistic;
8use crate::error::FdarError;
9use crate::explain::{project_scores, subsample_rows};
10use crate::explain_generic::FpcPredictor;
11use crate::matrix::FdMatrix;
12use crate::scalar_on_function::functional_logistic;
13
14use super::{
15    argmax, build_classification_result, compute_cal_scores, conformal_split, subset_vec_i8,
16    subset_vec_usize, validate_split_inputs, ClassificationScore, ConformalClassificationResult,
17    ConformalMethod,
18};
19
20/// Split-conformal prediction sets for functional classifiers (LDA / QDA / kNN).
21///
22/// Splits data, refits the specified classifier on the proper-training subset,
23/// computes non-conformity scores on calibration, then builds prediction sets
24/// for test data.
25///
26/// # Arguments
27/// * `classifier` — One of `"lda"`, `"qda"`, or `"knn"`
28/// * `k_nn` — Number of neighbors (only used if `classifier == "knn"`)
29/// * `score_type` — [`ClassificationScore::Lac`] or [`ClassificationScore::Aps`]
30///
31/// # Errors
32///
33/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 4 observations,
34/// `test_data` is empty, `y` length differs from the number of rows in `data`,
35/// or `data` and `test_data` have different numbers of columns.
36/// Returns [`FdarError::InvalidParameter`] if `cal_fraction` or `alpha` is not in (0, 1),
37/// or `classifier` is not one of `"lda"`, `"qda"`, or `"knn"`.
38/// Returns [`FdarError::ComputationFailed`] if the classifier fitting fails.
39#[must_use = "expensive computation whose result should not be discarded"]
40pub fn conformal_classif(
41    data: &FdMatrix,
42    y: &[usize],
43    test_data: &FdMatrix,
44    covariates_train: Option<&FdMatrix>,
45    _covariates_test: Option<&FdMatrix>,
46    ncomp: usize,
47    classifier: &str,
48    k_nn: usize,
49    score_type: ClassificationScore,
50    cal_fraction: f64,
51    alpha: f64,
52    seed: u64,
53) -> Result<ConformalClassificationResult, FdarError> {
54    let n = data.nrows();
55    validate_split_inputs(n, test_data.nrows(), cal_fraction, alpha)?;
56    if y.len() != n {
57        return Err(FdarError::InvalidDimension {
58            parameter: "y",
59            expected: format!("{n}"),
60            actual: format!("{}", y.len()),
61        });
62    }
63    if data.ncols() != test_data.ncols() {
64        return Err(FdarError::InvalidDimension {
65            parameter: "test_data",
66            expected: format!("{} columns", data.ncols()),
67            actual: format!("{} columns", test_data.ncols()),
68        });
69    }
70
71    let (proper_idx, cal_idx) = conformal_split(n, cal_fraction, seed);
72
73    let proper_data = subsample_rows(data, &proper_idx);
74    let proper_y = subset_vec_usize(y, &proper_idx);
75    let proper_cov = covariates_train.map(|c| subsample_rows(c, &proper_idx));
76
77    // Fit classifier on proper-training
78    let fit: ClassifFit = match classifier {
79        "lda" => fclassif_lda_fit(&proper_data, &proper_y, proper_cov.as_ref(), ncomp)?,
80        "qda" => fclassif_qda_fit(&proper_data, &proper_y, proper_cov.as_ref(), ncomp)?,
81        "knn" => fclassif_knn_fit(&proper_data, &proper_y, proper_cov.as_ref(), ncomp, k_nn)?,
82        _ => {
83            return Err(FdarError::InvalidParameter {
84                parameter: "classifier",
85                message: format!(
86                    "unknown classifier '{classifier}', expected 'lda', 'qda', or 'knn'"
87                ),
88            })
89        }
90    };
91
92    // Get calibration probabilities
93    let cal_data = subsample_rows(data, &cal_idx);
94    let cal_scores_mat = fit.project(&cal_data);
95    let cal_probs = classif_predict_probs(&fit, &cal_scores_mat);
96    let cal_true = subset_vec_usize(y, &cal_idx);
97    let cal_scores = compute_cal_scores(&cal_probs, &cal_true, score_type);
98
99    // Get test probabilities
100    let test_scores_mat = fit.project(test_data);
101    let test_probs = classif_predict_probs(&fit, &test_scores_mat);
102    let test_pred_classes: Vec<usize> = test_probs.iter().map(|p| argmax(p)).collect();
103
104    Ok(build_classification_result(
105        cal_scores,
106        &test_probs,
107        test_pred_classes,
108        alpha,
109        ConformalMethod::Split,
110        score_type,
111    ))
112}
113
114/// Split-conformal prediction sets for functional logistic regression.
115///
116/// Refits [`functional_logistic`] on the proper-training subset.
117/// Binary classification -> prediction sets of size 1 or 2.
118///
119/// # Errors
120///
121/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 4 observations,
122/// `test_data` is empty, `y` length differs from the number of rows in `data`,
123/// or `data` and `test_data` have different numbers of columns.
124/// Returns [`FdarError::InvalidParameter`] if `cal_fraction` or `alpha` is not in (0, 1),
125/// or the proper training set is too small for the requested `ncomp`.
126/// Returns [`FdarError::ComputationFailed`] if the `functional_logistic` fitting fails.
127#[must_use = "expensive computation whose result should not be discarded"]
128pub fn conformal_logistic(
129    data: &FdMatrix,
130    y: &[f64],
131    test_data: &FdMatrix,
132    scalar_train: Option<&FdMatrix>,
133    scalar_test: Option<&FdMatrix>,
134    ncomp: usize,
135    max_iter: usize,
136    tol: f64,
137    score_type: ClassificationScore,
138    cal_fraction: f64,
139    alpha: f64,
140    seed: u64,
141) -> Result<ConformalClassificationResult, FdarError> {
142    let n = data.nrows();
143    validate_split_inputs(n, test_data.nrows(), cal_fraction, alpha)?;
144    if y.len() != n {
145        return Err(FdarError::InvalidDimension {
146            parameter: "y",
147            expected: format!("{n}"),
148            actual: format!("{}", y.len()),
149        });
150    }
151    if data.ncols() != test_data.ncols() {
152        return Err(FdarError::InvalidDimension {
153            parameter: "test_data",
154            expected: format!("{} columns", data.ncols()),
155            actual: format!("{} columns", test_data.ncols()),
156        });
157    }
158
159    let (proper_idx, cal_idx) = conformal_split(n, cal_fraction, seed);
160    if proper_idx.len() < ncomp + 2 {
161        return Err(FdarError::InvalidParameter {
162            parameter: "ncomp",
163            message: format!(
164                "proper training set size {} too small for ncomp={}",
165                proper_idx.len(),
166                ncomp
167            ),
168        });
169    }
170
171    let proper_data = subsample_rows(data, &proper_idx);
172    let proper_y = subset_vec(y, &proper_idx);
173    let proper_sc = scalar_train.map(|sc| subsample_rows(sc, &proper_idx));
174
175    let refit = functional_logistic(
176        &proper_data,
177        &proper_y,
178        proper_sc.as_ref(),
179        ncomp,
180        max_iter,
181        tol,
182    )?;
183
184    // Calibration: get probabilities
185    let cal_data = subsample_rows(data, &cal_idx);
186    let cal_sc = scalar_train.map(|sc| subsample_rows(sc, &cal_idx));
187    let cal_scores_mat = project_scores(
188        &cal_data,
189        &refit.fpca.mean,
190        &refit.fpca.rotation,
191        refit.ncomp,
192        &refit.fpca.weights,
193    );
194    let cal_probs = logistic_probs_from_scores(&refit, &cal_scores_mat, cal_sc.as_ref());
195    let cal_true: Vec<usize> = cal_idx.iter().map(|&i| y[i] as usize).collect();
196    let cal_scores = compute_cal_scores(&cal_probs, &cal_true, score_type);
197
198    // Test: get probabilities
199    let test_scores_mat = project_scores(
200        test_data,
201        &refit.fpca.mean,
202        &refit.fpca.rotation,
203        refit.ncomp,
204        &refit.fpca.weights,
205    );
206    let test_probs = logistic_probs_from_scores(&refit, &test_scores_mat, scalar_test);
207    let test_pred_classes: Vec<usize> = test_probs.iter().map(|p| argmax(p)).collect();
208
209    Ok(build_classification_result(
210        cal_scores,
211        &test_probs,
212        test_pred_classes,
213        alpha,
214        ConformalMethod::Split,
215        score_type,
216    ))
217}
218
219/// Split-conformal prediction sets for elastic logistic regression.
220///
221/// Refits [`elastic_logistic`] on the proper-training subset.
222///
223/// # Errors
224///
225/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 4 observations,
226/// `test_data` is empty, `y` length differs from the number of rows in `data`,
227/// or `data` and `test_data` have different numbers of columns.
228/// Returns [`FdarError::InvalidParameter`] if `cal_fraction` or `alpha` is not in (0, 1).
229/// Returns [`FdarError::ComputationFailed`] if the `elastic_logistic` fitting fails.
230#[must_use = "expensive computation whose result should not be discarded"]
231pub fn conformal_elastic_logistic(
232    data: &FdMatrix,
233    y: &[i8],
234    test_data: &FdMatrix,
235    argvals: &[f64],
236    lambda: f64,
237    score_type: ClassificationScore,
238    cal_fraction: f64,
239    alpha: f64,
240    seed: u64,
241) -> Result<ConformalClassificationResult, FdarError> {
242    let n = data.nrows();
243    validate_split_inputs(n, test_data.nrows(), cal_fraction, alpha)?;
244    if y.len() != n {
245        return Err(FdarError::InvalidDimension {
246            parameter: "y",
247            expected: format!("{n}"),
248            actual: format!("{}", y.len()),
249        });
250    }
251    if data.ncols() != test_data.ncols() {
252        return Err(FdarError::InvalidDimension {
253            parameter: "test_data",
254            expected: format!("{} columns", data.ncols()),
255            actual: format!("{} columns", test_data.ncols()),
256        });
257    }
258
259    let (proper_idx, cal_idx) = conformal_split(n, cal_fraction, seed);
260
261    let proper_data = subsample_rows(data, &proper_idx);
262    let proper_y = subset_vec_i8(y, &proper_idx);
263
264    let refit = elastic_logistic(&proper_data, &proper_y, argvals, 20, lambda, 50, 1e-4)?;
265
266    // Calibration probabilities
267    let cal_data = subsample_rows(data, &cal_idx);
268    let cal_probs = predict_elastic_logistic_probs(&refit, &cal_data, argvals);
269    let cal_true: Vec<usize> = cal_idx.iter().map(|&i| usize::from(y[i] == 1)).collect();
270    let cal_scores = compute_cal_scores(&cal_probs, &cal_true, score_type);
271
272    // Test probabilities
273    let test_probs = predict_elastic_logistic_probs(&refit, test_data, argvals);
274    let test_pred_classes: Vec<usize> = test_probs.iter().map(|p| argmax(p)).collect();
275
276    Ok(build_classification_result(
277        cal_scores,
278        &test_probs,
279        test_pred_classes,
280        alpha,
281        ConformalMethod::Split,
282        score_type,
283    ))
284}
285
286// ═══════════════════════════════════════════════════════════════════════════
287// Classification prediction helpers
288// ═══════════════════════════════════════════════════════════════════════════
289
290/// Helper: get binary class probabilities from functional logistic result.
291fn logistic_probs_from_scores(
292    fit: &crate::scalar_on_function::FunctionalLogisticResult,
293    scores: &FdMatrix,
294    scalar_covariates: Option<&FdMatrix>,
295) -> Vec<Vec<f64>> {
296    let n = scores.nrows();
297    let ncomp = fit.ncomp;
298    (0..n)
299        .map(|i| {
300            let s: Vec<f64> = (0..ncomp).map(|k| scores[(i, k)]).collect();
301            let sc_row: Option<Vec<f64>> =
302                scalar_covariates.map(|sc| (0..sc.ncols()).map(|j| sc[(i, j)]).collect());
303            let mut eta = fit.coefficients[0];
304            for k in 0..ncomp {
305                eta += fit.coefficients[1 + k] * s[k];
306            }
307            if let Some(ref sc) = sc_row {
308                for (j, &v) in sc.iter().enumerate() {
309                    if j < fit.gamma.len() {
310                        eta += fit.gamma[j] * v;
311                    }
312                }
313            }
314            let p1 = crate::scalar_on_function::sigmoid(eta);
315            vec![1.0 - p1, p1]
316        })
317        .collect()
318}
319
320/// Helper: predict binary probabilities from elastic logistic result.
321fn predict_elastic_logistic_probs(
322    result: &crate::elastic_regression::ElasticLogisticResult,
323    new_data: &FdMatrix,
324    argvals: &[f64],
325) -> Vec<Vec<f64>> {
326    let (n_new, m) = new_data.shape();
327    let weights = crate::helpers::simpsons_weights(argvals);
328    let q_new = crate::alignment::srsf_transform(new_data, argvals);
329
330    (0..n_new)
331        .map(|i| {
332            let qi: Vec<f64> = (0..m).map(|j| q_new[(i, j)]).collect();
333            let gam = crate::alignment::dp_alignment_core(&result.beta, &qi, argvals, 0.0);
334            let q_warped = crate::alignment::reparameterize_curve(&qi, argvals, &gam);
335            let h = (argvals[m - 1] - argvals[0]) / (m - 1) as f64;
336            let gam_deriv = crate::helpers::gradient_uniform(&gam, h);
337
338            let mut eta = result.alpha;
339            for j in 0..m {
340                let q_aligned = q_warped[j] * gam_deriv[j].max(0.0).sqrt();
341                eta += q_aligned * result.beta[j] * weights[j];
342            }
343            let p1 = 1.0 / (1.0 + (-eta).exp());
344            vec![1.0 - p1, p1]
345        })
346        .collect()
347}