1use crate::classification::{
4 classif_predict_probs, fclassif_knn_fit, fclassif_lda_fit, fclassif_qda_fit, ClassifFit,
5};
6use crate::cv::subset_vec;
7use crate::elastic_regression::elastic_logistic;
8use crate::error::FdarError;
9use crate::explain::{project_scores, subsample_rows};
10use crate::explain_generic::FpcPredictor;
11use crate::matrix::FdMatrix;
12use crate::scalar_on_function::functional_logistic;
13
14use super::{
15 argmax, build_classification_result, compute_cal_scores, conformal_split, subset_vec_i8,
16 subset_vec_usize, validate_split_inputs, ClassificationScore, ConformalClassificationResult,
17 ConformalMethod,
18};
19
20#[must_use = "expensive computation whose result should not be discarded"]
40pub fn conformal_classif(
41 data: &FdMatrix,
42 y: &[usize],
43 test_data: &FdMatrix,
44 covariates_train: Option<&FdMatrix>,
45 _covariates_test: Option<&FdMatrix>,
46 ncomp: usize,
47 classifier: &str,
48 k_nn: usize,
49 score_type: ClassificationScore,
50 cal_fraction: f64,
51 alpha: f64,
52 seed: u64,
53) -> Result<ConformalClassificationResult, FdarError> {
54 let n = data.nrows();
55 validate_split_inputs(n, test_data.nrows(), cal_fraction, alpha)?;
56 if y.len() != n {
57 return Err(FdarError::InvalidDimension {
58 parameter: "y",
59 expected: format!("{n}"),
60 actual: format!("{}", y.len()),
61 });
62 }
63 if data.ncols() != test_data.ncols() {
64 return Err(FdarError::InvalidDimension {
65 parameter: "test_data",
66 expected: format!("{} columns", data.ncols()),
67 actual: format!("{} columns", test_data.ncols()),
68 });
69 }
70
71 let (proper_idx, cal_idx) = conformal_split(n, cal_fraction, seed);
72
73 let proper_data = subsample_rows(data, &proper_idx);
74 let proper_y = subset_vec_usize(y, &proper_idx);
75 let proper_cov = covariates_train.map(|c| subsample_rows(c, &proper_idx));
76
77 let fit: ClassifFit = match classifier {
79 "lda" => fclassif_lda_fit(&proper_data, &proper_y, proper_cov.as_ref(), ncomp)?,
80 "qda" => fclassif_qda_fit(&proper_data, &proper_y, proper_cov.as_ref(), ncomp)?,
81 "knn" => fclassif_knn_fit(&proper_data, &proper_y, proper_cov.as_ref(), ncomp, k_nn)?,
82 _ => {
83 return Err(FdarError::InvalidParameter {
84 parameter: "classifier",
85 message: format!(
86 "unknown classifier '{classifier}', expected 'lda', 'qda', or 'knn'"
87 ),
88 })
89 }
90 };
91
92 let cal_data = subsample_rows(data, &cal_idx);
94 let cal_scores_mat = fit.project(&cal_data);
95 let cal_probs = classif_predict_probs(&fit, &cal_scores_mat);
96 let cal_true = subset_vec_usize(y, &cal_idx);
97 let cal_scores = compute_cal_scores(&cal_probs, &cal_true, score_type);
98
99 let test_scores_mat = fit.project(test_data);
101 let test_probs = classif_predict_probs(&fit, &test_scores_mat);
102 let test_pred_classes: Vec<usize> = test_probs.iter().map(|p| argmax(p)).collect();
103
104 Ok(build_classification_result(
105 cal_scores,
106 &test_probs,
107 test_pred_classes,
108 alpha,
109 ConformalMethod::Split,
110 score_type,
111 ))
112}
113
114#[must_use = "expensive computation whose result should not be discarded"]
128pub fn conformal_logistic(
129 data: &FdMatrix,
130 y: &[f64],
131 test_data: &FdMatrix,
132 scalar_train: Option<&FdMatrix>,
133 scalar_test: Option<&FdMatrix>,
134 ncomp: usize,
135 max_iter: usize,
136 tol: f64,
137 score_type: ClassificationScore,
138 cal_fraction: f64,
139 alpha: f64,
140 seed: u64,
141) -> Result<ConformalClassificationResult, FdarError> {
142 let n = data.nrows();
143 validate_split_inputs(n, test_data.nrows(), cal_fraction, alpha)?;
144 if y.len() != n {
145 return Err(FdarError::InvalidDimension {
146 parameter: "y",
147 expected: format!("{n}"),
148 actual: format!("{}", y.len()),
149 });
150 }
151 if data.ncols() != test_data.ncols() {
152 return Err(FdarError::InvalidDimension {
153 parameter: "test_data",
154 expected: format!("{} columns", data.ncols()),
155 actual: format!("{} columns", test_data.ncols()),
156 });
157 }
158
159 let (proper_idx, cal_idx) = conformal_split(n, cal_fraction, seed);
160 if proper_idx.len() < ncomp + 2 {
161 return Err(FdarError::InvalidParameter {
162 parameter: "ncomp",
163 message: format!(
164 "proper training set size {} too small for ncomp={}",
165 proper_idx.len(),
166 ncomp
167 ),
168 });
169 }
170
171 let proper_data = subsample_rows(data, &proper_idx);
172 let proper_y = subset_vec(y, &proper_idx);
173 let proper_sc = scalar_train.map(|sc| subsample_rows(sc, &proper_idx));
174
175 let refit = functional_logistic(
176 &proper_data,
177 &proper_y,
178 proper_sc.as_ref(),
179 ncomp,
180 max_iter,
181 tol,
182 )?;
183
184 let cal_data = subsample_rows(data, &cal_idx);
186 let cal_sc = scalar_train.map(|sc| subsample_rows(sc, &cal_idx));
187 let cal_scores_mat = project_scores(
188 &cal_data,
189 &refit.fpca.mean,
190 &refit.fpca.rotation,
191 refit.ncomp,
192 );
193 let cal_probs = logistic_probs_from_scores(&refit, &cal_scores_mat, cal_sc.as_ref());
194 let cal_true: Vec<usize> = cal_idx.iter().map(|&i| y[i] as usize).collect();
195 let cal_scores = compute_cal_scores(&cal_probs, &cal_true, score_type);
196
197 let test_scores_mat = project_scores(
199 test_data,
200 &refit.fpca.mean,
201 &refit.fpca.rotation,
202 refit.ncomp,
203 );
204 let test_probs = logistic_probs_from_scores(&refit, &test_scores_mat, scalar_test);
205 let test_pred_classes: Vec<usize> = test_probs.iter().map(|p| argmax(p)).collect();
206
207 Ok(build_classification_result(
208 cal_scores,
209 &test_probs,
210 test_pred_classes,
211 alpha,
212 ConformalMethod::Split,
213 score_type,
214 ))
215}
216
217#[must_use = "expensive computation whose result should not be discarded"]
229pub fn conformal_elastic_logistic(
230 data: &FdMatrix,
231 y: &[i8],
232 test_data: &FdMatrix,
233 argvals: &[f64],
234 lambda: f64,
235 score_type: ClassificationScore,
236 cal_fraction: f64,
237 alpha: f64,
238 seed: u64,
239) -> Result<ConformalClassificationResult, FdarError> {
240 let n = data.nrows();
241 validate_split_inputs(n, test_data.nrows(), cal_fraction, alpha)?;
242 if y.len() != n {
243 return Err(FdarError::InvalidDimension {
244 parameter: "y",
245 expected: format!("{n}"),
246 actual: format!("{}", y.len()),
247 });
248 }
249 if data.ncols() != test_data.ncols() {
250 return Err(FdarError::InvalidDimension {
251 parameter: "test_data",
252 expected: format!("{} columns", data.ncols()),
253 actual: format!("{} columns", test_data.ncols()),
254 });
255 }
256
257 let (proper_idx, cal_idx) = conformal_split(n, cal_fraction, seed);
258
259 let proper_data = subsample_rows(data, &proper_idx);
260 let proper_y = subset_vec_i8(y, &proper_idx);
261
262 let refit = elastic_logistic(&proper_data, &proper_y, argvals, 20, lambda, 50, 1e-4)?;
263
264 let cal_data = subsample_rows(data, &cal_idx);
266 let cal_probs = predict_elastic_logistic_probs(&refit, &cal_data, argvals);
267 let cal_true: Vec<usize> = cal_idx.iter().map(|&i| usize::from(y[i] == 1)).collect();
268 let cal_scores = compute_cal_scores(&cal_probs, &cal_true, score_type);
269
270 let test_probs = predict_elastic_logistic_probs(&refit, test_data, argvals);
272 let test_pred_classes: Vec<usize> = test_probs.iter().map(|p| argmax(p)).collect();
273
274 Ok(build_classification_result(
275 cal_scores,
276 &test_probs,
277 test_pred_classes,
278 alpha,
279 ConformalMethod::Split,
280 score_type,
281 ))
282}
283
284fn logistic_probs_from_scores(
290 fit: &crate::scalar_on_function::FunctionalLogisticResult,
291 scores: &FdMatrix,
292 scalar_covariates: Option<&FdMatrix>,
293) -> Vec<Vec<f64>> {
294 let n = scores.nrows();
295 let ncomp = fit.ncomp;
296 (0..n)
297 .map(|i| {
298 let s: Vec<f64> = (0..ncomp).map(|k| scores[(i, k)]).collect();
299 let sc_row: Option<Vec<f64>> =
300 scalar_covariates.map(|sc| (0..sc.ncols()).map(|j| sc[(i, j)]).collect());
301 let mut eta = fit.coefficients[0];
302 for k in 0..ncomp {
303 eta += fit.coefficients[1 + k] * s[k];
304 }
305 if let Some(ref sc) = sc_row {
306 for (j, &v) in sc.iter().enumerate() {
307 if j < fit.gamma.len() {
308 eta += fit.gamma[j] * v;
309 }
310 }
311 }
312 let p1 = crate::scalar_on_function::sigmoid(eta);
313 vec![1.0 - p1, p1]
314 })
315 .collect()
316}
317
318fn predict_elastic_logistic_probs(
320 result: &crate::elastic_regression::ElasticLogisticResult,
321 new_data: &FdMatrix,
322 argvals: &[f64],
323) -> Vec<Vec<f64>> {
324 let (n_new, m) = new_data.shape();
325 let weights = crate::helpers::simpsons_weights(argvals);
326 let q_new = crate::alignment::srsf_transform(new_data, argvals);
327
328 (0..n_new)
329 .map(|i| {
330 let qi: Vec<f64> = (0..m).map(|j| q_new[(i, j)]).collect();
331 let gam = crate::alignment::dp_alignment_core(&result.beta, &qi, argvals, 0.0);
332 let q_warped = crate::alignment::reparameterize_curve(&qi, argvals, &gam);
333 let h = (argvals[m - 1] - argvals[0]) / (m - 1) as f64;
334 let gam_deriv = crate::helpers::gradient_uniform(&gam, h);
335
336 let mut eta = result.alpha;
337 for j in 0..m {
338 let q_aligned = q_warped[j] * gam_deriv[j].max(0.0).sqrt();
339 eta += q_aligned * result.beta[j] * weights[j];
340 }
341 let p1 = 1.0 / (1.0 + (-eta).exp());
342 vec![1.0 - p1, p1]
343 })
344 .collect()
345}