Skip to main content

fdars_core/conformal/
classification.rs

1//! Split-conformal prediction sets for classification models.
2
3use crate::classification::{
4    classif_predict_probs, fclassif_knn_fit, fclassif_lda_fit, fclassif_qda_fit, ClassifFit,
5};
6use crate::cv::subset_vec;
7use crate::elastic_regression::elastic_logistic;
8use crate::error::FdarError;
9use crate::explain::{project_scores, subsample_rows};
10use crate::explain_generic::FpcPredictor;
11use crate::matrix::FdMatrix;
12use crate::scalar_on_function::functional_logistic;
13
14use super::{
15    argmax, build_classification_result, compute_cal_scores, conformal_split, subset_vec_i8,
16    subset_vec_usize, validate_split_inputs, ClassificationScore, ConformalClassificationResult,
17    ConformalMethod,
18};
19
20/// Split-conformal prediction sets for functional classifiers (LDA / QDA / kNN).
21///
22/// Splits data, refits the specified classifier on the proper-training subset,
23/// computes non-conformity scores on calibration, then builds prediction sets
24/// for test data.
25///
26/// # Arguments
27/// * `classifier` — One of `"lda"`, `"qda"`, or `"knn"`
28/// * `k_nn` — Number of neighbors (only used if `classifier == "knn"`)
29/// * `score_type` — [`ClassificationScore::Lac`] or [`ClassificationScore::Aps`]
30///
31/// # Errors
32///
33/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 4 observations,
34/// `test_data` is empty, `y` length differs from the number of rows in `data`,
35/// or `data` and `test_data` have different numbers of columns.
36/// Returns [`FdarError::InvalidParameter`] if `cal_fraction` or `alpha` is not in (0, 1),
37/// or `classifier` is not one of `"lda"`, `"qda"`, or `"knn"`.
38/// Returns [`FdarError::ComputationFailed`] if the classifier fitting fails.
39#[must_use = "expensive computation whose result should not be discarded"]
40pub fn conformal_classif(
41    data: &FdMatrix,
42    y: &[usize],
43    test_data: &FdMatrix,
44    covariates_train: Option<&FdMatrix>,
45    _covariates_test: Option<&FdMatrix>,
46    ncomp: usize,
47    classifier: &str,
48    k_nn: usize,
49    score_type: ClassificationScore,
50    cal_fraction: f64,
51    alpha: f64,
52    seed: u64,
53) -> Result<ConformalClassificationResult, FdarError> {
54    let n = data.nrows();
55    validate_split_inputs(n, test_data.nrows(), cal_fraction, alpha)?;
56    if y.len() != n {
57        return Err(FdarError::InvalidDimension {
58            parameter: "y",
59            expected: format!("{n}"),
60            actual: format!("{}", y.len()),
61        });
62    }
63    if data.ncols() != test_data.ncols() {
64        return Err(FdarError::InvalidDimension {
65            parameter: "test_data",
66            expected: format!("{} columns", data.ncols()),
67            actual: format!("{} columns", test_data.ncols()),
68        });
69    }
70
71    let (proper_idx, cal_idx) = conformal_split(n, cal_fraction, seed);
72
73    let proper_data = subsample_rows(data, &proper_idx);
74    let proper_y = subset_vec_usize(y, &proper_idx);
75    let proper_cov = covariates_train.map(|c| subsample_rows(c, &proper_idx));
76
77    // Fit classifier on proper-training
78    let fit: ClassifFit = match classifier {
79        "lda" => fclassif_lda_fit(&proper_data, &proper_y, proper_cov.as_ref(), ncomp)?,
80        "qda" => fclassif_qda_fit(&proper_data, &proper_y, proper_cov.as_ref(), ncomp)?,
81        "knn" => fclassif_knn_fit(&proper_data, &proper_y, proper_cov.as_ref(), ncomp, k_nn)?,
82        _ => {
83            return Err(FdarError::InvalidParameter {
84                parameter: "classifier",
85                message: format!(
86                    "unknown classifier '{classifier}', expected 'lda', 'qda', or 'knn'"
87                ),
88            })
89        }
90    };
91
92    // Get calibration probabilities
93    let cal_data = subsample_rows(data, &cal_idx);
94    let cal_scores_mat = fit.project(&cal_data);
95    let cal_probs = classif_predict_probs(&fit, &cal_scores_mat);
96    let cal_true = subset_vec_usize(y, &cal_idx);
97    let cal_scores = compute_cal_scores(&cal_probs, &cal_true, score_type);
98
99    // Get test probabilities
100    let test_scores_mat = fit.project(test_data);
101    let test_probs = classif_predict_probs(&fit, &test_scores_mat);
102    let test_pred_classes: Vec<usize> = test_probs.iter().map(|p| argmax(p)).collect();
103
104    Ok(build_classification_result(
105        cal_scores,
106        &test_probs,
107        test_pred_classes,
108        alpha,
109        ConformalMethod::Split,
110        score_type,
111    ))
112}
113
114/// Split-conformal prediction sets for functional logistic regression.
115///
116/// Refits [`functional_logistic`] on the proper-training subset.
117/// Binary classification -> prediction sets of size 1 or 2.
118///
119/// # Errors
120///
121/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 4 observations,
122/// `test_data` is empty, `y` length differs from the number of rows in `data`,
123/// or `data` and `test_data` have different numbers of columns.
124/// Returns [`FdarError::InvalidParameter`] if `cal_fraction` or `alpha` is not in (0, 1),
125/// or the proper training set is too small for the requested `ncomp`.
126/// Returns [`FdarError::ComputationFailed`] if the `functional_logistic` fitting fails.
127#[must_use = "expensive computation whose result should not be discarded"]
128pub fn conformal_logistic(
129    data: &FdMatrix,
130    y: &[f64],
131    test_data: &FdMatrix,
132    scalar_train: Option<&FdMatrix>,
133    scalar_test: Option<&FdMatrix>,
134    ncomp: usize,
135    max_iter: usize,
136    tol: f64,
137    score_type: ClassificationScore,
138    cal_fraction: f64,
139    alpha: f64,
140    seed: u64,
141) -> Result<ConformalClassificationResult, FdarError> {
142    let n = data.nrows();
143    validate_split_inputs(n, test_data.nrows(), cal_fraction, alpha)?;
144    if y.len() != n {
145        return Err(FdarError::InvalidDimension {
146            parameter: "y",
147            expected: format!("{n}"),
148            actual: format!("{}", y.len()),
149        });
150    }
151    if data.ncols() != test_data.ncols() {
152        return Err(FdarError::InvalidDimension {
153            parameter: "test_data",
154            expected: format!("{} columns", data.ncols()),
155            actual: format!("{} columns", test_data.ncols()),
156        });
157    }
158
159    let (proper_idx, cal_idx) = conformal_split(n, cal_fraction, seed);
160    if proper_idx.len() < ncomp + 2 {
161        return Err(FdarError::InvalidParameter {
162            parameter: "ncomp",
163            message: format!(
164                "proper training set size {} too small for ncomp={}",
165                proper_idx.len(),
166                ncomp
167            ),
168        });
169    }
170
171    let proper_data = subsample_rows(data, &proper_idx);
172    let proper_y = subset_vec(y, &proper_idx);
173    let proper_sc = scalar_train.map(|sc| subsample_rows(sc, &proper_idx));
174
175    let refit = functional_logistic(
176        &proper_data,
177        &proper_y,
178        proper_sc.as_ref(),
179        ncomp,
180        max_iter,
181        tol,
182    )?;
183
184    // Calibration: get probabilities
185    let cal_data = subsample_rows(data, &cal_idx);
186    let cal_sc = scalar_train.map(|sc| subsample_rows(sc, &cal_idx));
187    let cal_scores_mat = project_scores(
188        &cal_data,
189        &refit.fpca.mean,
190        &refit.fpca.rotation,
191        refit.ncomp,
192    );
193    let cal_probs = logistic_probs_from_scores(&refit, &cal_scores_mat, cal_sc.as_ref());
194    let cal_true: Vec<usize> = cal_idx.iter().map(|&i| y[i] as usize).collect();
195    let cal_scores = compute_cal_scores(&cal_probs, &cal_true, score_type);
196
197    // Test: get probabilities
198    let test_scores_mat = project_scores(
199        test_data,
200        &refit.fpca.mean,
201        &refit.fpca.rotation,
202        refit.ncomp,
203    );
204    let test_probs = logistic_probs_from_scores(&refit, &test_scores_mat, scalar_test);
205    let test_pred_classes: Vec<usize> = test_probs.iter().map(|p| argmax(p)).collect();
206
207    Ok(build_classification_result(
208        cal_scores,
209        &test_probs,
210        test_pred_classes,
211        alpha,
212        ConformalMethod::Split,
213        score_type,
214    ))
215}
216
217/// Split-conformal prediction sets for elastic logistic regression.
218///
219/// Refits [`elastic_logistic`] on the proper-training subset.
220///
221/// # Errors
222///
223/// Returns [`FdarError::InvalidDimension`] if `data` has fewer than 4 observations,
224/// `test_data` is empty, `y` length differs from the number of rows in `data`,
225/// or `data` and `test_data` have different numbers of columns.
226/// Returns [`FdarError::InvalidParameter`] if `cal_fraction` or `alpha` is not in (0, 1).
227/// Returns [`FdarError::ComputationFailed`] if the `elastic_logistic` fitting fails.
228#[must_use = "expensive computation whose result should not be discarded"]
229pub fn conformal_elastic_logistic(
230    data: &FdMatrix,
231    y: &[i8],
232    test_data: &FdMatrix,
233    argvals: &[f64],
234    lambda: f64,
235    score_type: ClassificationScore,
236    cal_fraction: f64,
237    alpha: f64,
238    seed: u64,
239) -> Result<ConformalClassificationResult, FdarError> {
240    let n = data.nrows();
241    validate_split_inputs(n, test_data.nrows(), cal_fraction, alpha)?;
242    if y.len() != n {
243        return Err(FdarError::InvalidDimension {
244            parameter: "y",
245            expected: format!("{n}"),
246            actual: format!("{}", y.len()),
247        });
248    }
249    if data.ncols() != test_data.ncols() {
250        return Err(FdarError::InvalidDimension {
251            parameter: "test_data",
252            expected: format!("{} columns", data.ncols()),
253            actual: format!("{} columns", test_data.ncols()),
254        });
255    }
256
257    let (proper_idx, cal_idx) = conformal_split(n, cal_fraction, seed);
258
259    let proper_data = subsample_rows(data, &proper_idx);
260    let proper_y = subset_vec_i8(y, &proper_idx);
261
262    let refit = elastic_logistic(&proper_data, &proper_y, argvals, 20, lambda, 50, 1e-4)?;
263
264    // Calibration probabilities
265    let cal_data = subsample_rows(data, &cal_idx);
266    let cal_probs = predict_elastic_logistic_probs(&refit, &cal_data, argvals);
267    let cal_true: Vec<usize> = cal_idx.iter().map(|&i| usize::from(y[i] == 1)).collect();
268    let cal_scores = compute_cal_scores(&cal_probs, &cal_true, score_type);
269
270    // Test probabilities
271    let test_probs = predict_elastic_logistic_probs(&refit, test_data, argvals);
272    let test_pred_classes: Vec<usize> = test_probs.iter().map(|p| argmax(p)).collect();
273
274    Ok(build_classification_result(
275        cal_scores,
276        &test_probs,
277        test_pred_classes,
278        alpha,
279        ConformalMethod::Split,
280        score_type,
281    ))
282}
283
284// ═══════════════════════════════════════════════════════════════════════════
285// Classification prediction helpers
286// ═══════════════════════════════════════════════════════════════════════════
287
288/// Helper: get binary class probabilities from functional logistic result.
289fn logistic_probs_from_scores(
290    fit: &crate::scalar_on_function::FunctionalLogisticResult,
291    scores: &FdMatrix,
292    scalar_covariates: Option<&FdMatrix>,
293) -> Vec<Vec<f64>> {
294    let n = scores.nrows();
295    let ncomp = fit.ncomp;
296    (0..n)
297        .map(|i| {
298            let s: Vec<f64> = (0..ncomp).map(|k| scores[(i, k)]).collect();
299            let sc_row: Option<Vec<f64>> =
300                scalar_covariates.map(|sc| (0..sc.ncols()).map(|j| sc[(i, j)]).collect());
301            let mut eta = fit.coefficients[0];
302            for k in 0..ncomp {
303                eta += fit.coefficients[1 + k] * s[k];
304            }
305            if let Some(ref sc) = sc_row {
306                for (j, &v) in sc.iter().enumerate() {
307                    if j < fit.gamma.len() {
308                        eta += fit.gamma[j] * v;
309                    }
310                }
311            }
312            let p1 = crate::scalar_on_function::sigmoid(eta);
313            vec![1.0 - p1, p1]
314        })
315        .collect()
316}
317
318/// Helper: predict binary probabilities from elastic logistic result.
319fn predict_elastic_logistic_probs(
320    result: &crate::elastic_regression::ElasticLogisticResult,
321    new_data: &FdMatrix,
322    argvals: &[f64],
323) -> Vec<Vec<f64>> {
324    let (n_new, m) = new_data.shape();
325    let weights = crate::helpers::simpsons_weights(argvals);
326    let q_new = crate::alignment::srsf_transform(new_data, argvals);
327
328    (0..n_new)
329        .map(|i| {
330            let qi: Vec<f64> = (0..m).map(|j| q_new[(i, j)]).collect();
331            let gam = crate::alignment::dp_alignment_core(&result.beta, &qi, argvals, 0.0);
332            let q_warped = crate::alignment::reparameterize_curve(&qi, argvals, &gam);
333            let h = (argvals[m - 1] - argvals[0]) / (m - 1) as f64;
334            let gam_deriv = crate::helpers::gradient_uniform(&gam, h);
335
336            let mut eta = result.alpha;
337            for j in 0..m {
338                let q_aligned = q_warped[j] * gam_deriv[j].max(0.0).sqrt();
339                eta += q_aligned * result.beta[j] * weights[j];
340            }
341            let p1 = 1.0 / (1.0 + (-eta).exp());
342            vec![1.0 - p1, p1]
343        })
344        .collect()
345}