1use crate::classification::{
4 classif_predict_probs, fclassif_knn_fit, fclassif_lda_fit, fclassif_qda_fit, ClassifFit,
5};
6use crate::cv::subset_vec;
7use crate::elastic_regression::elastic_logistic;
8use crate::error::FdarError;
9use crate::explain::{project_scores, subsample_rows};
10use crate::explain_generic::FpcPredictor;
11use crate::matrix::FdMatrix;
12use crate::scalar_on_function::functional_logistic;
13
14use super::{
15 argmax, build_classification_result, compute_cal_scores, conformal_split, subset_vec_i8,
16 subset_vec_usize, validate_split_inputs, ClassificationScore, ConformalClassificationResult,
17 ConformalMethod,
18};
19
20#[must_use = "expensive computation whose result should not be discarded"]
40pub fn conformal_classif(
41 data: &FdMatrix,
42 y: &[usize],
43 test_data: &FdMatrix,
44 covariates_train: Option<&FdMatrix>,
45 _covariates_test: Option<&FdMatrix>,
46 ncomp: usize,
47 classifier: &str,
48 k_nn: usize,
49 score_type: ClassificationScore,
50 cal_fraction: f64,
51 alpha: f64,
52 seed: u64,
53) -> Result<ConformalClassificationResult, FdarError> {
54 let n = data.nrows();
55 validate_split_inputs(n, test_data.nrows(), cal_fraction, alpha)?;
56 if y.len() != n {
57 return Err(FdarError::InvalidDimension {
58 parameter: "y",
59 expected: format!("{n}"),
60 actual: format!("{}", y.len()),
61 });
62 }
63 if data.ncols() != test_data.ncols() {
64 return Err(FdarError::InvalidDimension {
65 parameter: "test_data",
66 expected: format!("{} columns", data.ncols()),
67 actual: format!("{} columns", test_data.ncols()),
68 });
69 }
70
71 let (proper_idx, cal_idx) = conformal_split(n, cal_fraction, seed);
72
73 let proper_data = subsample_rows(data, &proper_idx);
74 let proper_y = subset_vec_usize(y, &proper_idx);
75 let proper_cov = covariates_train.map(|c| subsample_rows(c, &proper_idx));
76
77 let fit: ClassifFit = match classifier {
79 "lda" => fclassif_lda_fit(&proper_data, &proper_y, proper_cov.as_ref(), ncomp)?,
80 "qda" => fclassif_qda_fit(&proper_data, &proper_y, proper_cov.as_ref(), ncomp)?,
81 "knn" => fclassif_knn_fit(&proper_data, &proper_y, proper_cov.as_ref(), ncomp, k_nn)?,
82 _ => {
83 return Err(FdarError::InvalidParameter {
84 parameter: "classifier",
85 message: format!(
86 "unknown classifier '{classifier}', expected 'lda', 'qda', or 'knn'"
87 ),
88 })
89 }
90 };
91
92 let cal_data = subsample_rows(data, &cal_idx);
94 let cal_scores_mat = fit.project(&cal_data);
95 let cal_probs = classif_predict_probs(&fit, &cal_scores_mat);
96 let cal_true = subset_vec_usize(y, &cal_idx);
97 let cal_scores = compute_cal_scores(&cal_probs, &cal_true, score_type);
98
99 let test_scores_mat = fit.project(test_data);
101 let test_probs = classif_predict_probs(&fit, &test_scores_mat);
102 let test_pred_classes: Vec<usize> = test_probs.iter().map(|p| argmax(p)).collect();
103
104 Ok(build_classification_result(
105 cal_scores,
106 &test_probs,
107 test_pred_classes,
108 alpha,
109 ConformalMethod::Split,
110 score_type,
111 ))
112}
113
114#[must_use = "expensive computation whose result should not be discarded"]
128pub fn conformal_logistic(
129 data: &FdMatrix,
130 y: &[f64],
131 test_data: &FdMatrix,
132 scalar_train: Option<&FdMatrix>,
133 scalar_test: Option<&FdMatrix>,
134 ncomp: usize,
135 max_iter: usize,
136 tol: f64,
137 score_type: ClassificationScore,
138 cal_fraction: f64,
139 alpha: f64,
140 seed: u64,
141) -> Result<ConformalClassificationResult, FdarError> {
142 let n = data.nrows();
143 validate_split_inputs(n, test_data.nrows(), cal_fraction, alpha)?;
144 if y.len() != n {
145 return Err(FdarError::InvalidDimension {
146 parameter: "y",
147 expected: format!("{n}"),
148 actual: format!("{}", y.len()),
149 });
150 }
151 if data.ncols() != test_data.ncols() {
152 return Err(FdarError::InvalidDimension {
153 parameter: "test_data",
154 expected: format!("{} columns", data.ncols()),
155 actual: format!("{} columns", test_data.ncols()),
156 });
157 }
158
159 let (proper_idx, cal_idx) = conformal_split(n, cal_fraction, seed);
160 if proper_idx.len() < ncomp + 2 {
161 return Err(FdarError::InvalidParameter {
162 parameter: "ncomp",
163 message: format!(
164 "proper training set size {} too small for ncomp={}",
165 proper_idx.len(),
166 ncomp
167 ),
168 });
169 }
170
171 let proper_data = subsample_rows(data, &proper_idx);
172 let proper_y = subset_vec(y, &proper_idx);
173 let proper_sc = scalar_train.map(|sc| subsample_rows(sc, &proper_idx));
174
175 let refit = functional_logistic(
176 &proper_data,
177 &proper_y,
178 proper_sc.as_ref(),
179 ncomp,
180 max_iter,
181 tol,
182 )?;
183
184 let cal_data = subsample_rows(data, &cal_idx);
186 let cal_sc = scalar_train.map(|sc| subsample_rows(sc, &cal_idx));
187 let cal_scores_mat = project_scores(
188 &cal_data,
189 &refit.fpca.mean,
190 &refit.fpca.rotation,
191 refit.ncomp,
192 &refit.fpca.weights,
193 );
194 let cal_probs = logistic_probs_from_scores(&refit, &cal_scores_mat, cal_sc.as_ref());
195 let cal_true: Vec<usize> = cal_idx.iter().map(|&i| y[i] as usize).collect();
196 let cal_scores = compute_cal_scores(&cal_probs, &cal_true, score_type);
197
198 let test_scores_mat = project_scores(
200 test_data,
201 &refit.fpca.mean,
202 &refit.fpca.rotation,
203 refit.ncomp,
204 &refit.fpca.weights,
205 );
206 let test_probs = logistic_probs_from_scores(&refit, &test_scores_mat, scalar_test);
207 let test_pred_classes: Vec<usize> = test_probs.iter().map(|p| argmax(p)).collect();
208
209 Ok(build_classification_result(
210 cal_scores,
211 &test_probs,
212 test_pred_classes,
213 alpha,
214 ConformalMethod::Split,
215 score_type,
216 ))
217}
218
219#[must_use = "expensive computation whose result should not be discarded"]
231pub fn conformal_elastic_logistic(
232 data: &FdMatrix,
233 y: &[i8],
234 test_data: &FdMatrix,
235 argvals: &[f64],
236 lambda: f64,
237 score_type: ClassificationScore,
238 cal_fraction: f64,
239 alpha: f64,
240 seed: u64,
241) -> Result<ConformalClassificationResult, FdarError> {
242 let n = data.nrows();
243 validate_split_inputs(n, test_data.nrows(), cal_fraction, alpha)?;
244 if y.len() != n {
245 return Err(FdarError::InvalidDimension {
246 parameter: "y",
247 expected: format!("{n}"),
248 actual: format!("{}", y.len()),
249 });
250 }
251 if data.ncols() != test_data.ncols() {
252 return Err(FdarError::InvalidDimension {
253 parameter: "test_data",
254 expected: format!("{} columns", data.ncols()),
255 actual: format!("{} columns", test_data.ncols()),
256 });
257 }
258
259 let (proper_idx, cal_idx) = conformal_split(n, cal_fraction, seed);
260
261 let proper_data = subsample_rows(data, &proper_idx);
262 let proper_y = subset_vec_i8(y, &proper_idx);
263
264 let refit = elastic_logistic(&proper_data, &proper_y, argvals, 20, lambda, 50, 1e-4)?;
265
266 let cal_data = subsample_rows(data, &cal_idx);
268 let cal_probs = predict_elastic_logistic_probs(&refit, &cal_data, argvals);
269 let cal_true: Vec<usize> = cal_idx.iter().map(|&i| usize::from(y[i] == 1)).collect();
270 let cal_scores = compute_cal_scores(&cal_probs, &cal_true, score_type);
271
272 let test_probs = predict_elastic_logistic_probs(&refit, test_data, argvals);
274 let test_pred_classes: Vec<usize> = test_probs.iter().map(|p| argmax(p)).collect();
275
276 Ok(build_classification_result(
277 cal_scores,
278 &test_probs,
279 test_pred_classes,
280 alpha,
281 ConformalMethod::Split,
282 score_type,
283 ))
284}
285
286fn logistic_probs_from_scores(
292 fit: &crate::scalar_on_function::FunctionalLogisticResult,
293 scores: &FdMatrix,
294 scalar_covariates: Option<&FdMatrix>,
295) -> Vec<Vec<f64>> {
296 let n = scores.nrows();
297 let ncomp = fit.ncomp;
298 (0..n)
299 .map(|i| {
300 let s: Vec<f64> = (0..ncomp).map(|k| scores[(i, k)]).collect();
301 let sc_row: Option<Vec<f64>> =
302 scalar_covariates.map(|sc| (0..sc.ncols()).map(|j| sc[(i, j)]).collect());
303 let mut eta = fit.coefficients[0];
304 for k in 0..ncomp {
305 eta += fit.coefficients[1 + k] * s[k];
306 }
307 if let Some(ref sc) = sc_row {
308 for (j, &v) in sc.iter().enumerate() {
309 if j < fit.gamma.len() {
310 eta += fit.gamma[j] * v;
311 }
312 }
313 }
314 let p1 = crate::scalar_on_function::sigmoid(eta);
315 vec![1.0 - p1, p1]
316 })
317 .collect()
318}
319
320fn predict_elastic_logistic_probs(
322 result: &crate::elastic_regression::ElasticLogisticResult,
323 new_data: &FdMatrix,
324 argvals: &[f64],
325) -> Vec<Vec<f64>> {
326 let (n_new, m) = new_data.shape();
327 let weights = crate::helpers::simpsons_weights(argvals);
328 let q_new = crate::alignment::srsf_transform(new_data, argvals);
329
330 (0..n_new)
331 .map(|i| {
332 let qi: Vec<f64> = (0..m).map(|j| q_new[(i, j)]).collect();
333 let gam = crate::alignment::dp_alignment_core(&result.beta, &qi, argvals, 0.0);
334 let q_warped = crate::alignment::reparameterize_curve(&qi, argvals, &gam);
335 let h = (argvals[m - 1] - argvals[0]) / (m - 1) as f64;
336 let gam_deriv = crate::helpers::gradient_uniform(&gam, h);
337
338 let mut eta = result.alpha;
339 for j in 0..m {
340 let q_aligned = q_warped[j] * gam_deriv[j].max(0.0).sqrt();
341 eta += q_aligned * result.beta[j] * weights[j];
342 }
343 let p1 = 1.0 / (1.0 + (-eta).exp());
344 vec![1.0 - p1, p1]
345 })
346 .collect()
347}