fdars_core/classification/
cv.rs1use crate::error::FdarError;
4use crate::matrix::FdMatrix;
5use crate::regression::fdata_to_pc_1d;
6
7use super::lda::{lda_params, lda_predict};
8use super::qda::{build_qda_params, qda_predict};
9use super::{remap_labels, ClassifCvResult};
10use crate::linalg::cholesky_d;
11
12#[must_use = "expensive computation whose result should not be discarded"]
45pub fn fclassif_cv(
46 data: &FdMatrix,
47 argvals: &[f64],
48 y: &[usize],
49 scalar_covariates: Option<&FdMatrix>,
50 method: &str,
51 ncomp: usize,
52 nfold: usize,
53 seed: u64,
54) -> Result<ClassifCvResult, FdarError> {
55 let n = data.nrows();
56 if n < nfold || nfold < 2 {
57 return Err(FdarError::InvalidParameter {
58 parameter: "nfold",
59 message: format!("need 2 <= nfold <= n, got nfold={nfold}, n={n}"),
60 });
61 }
62
63 let (labels, g) = remap_labels(y);
64 if g < 2 {
65 return Err(FdarError::InvalidParameter {
66 parameter: "y",
67 message: format!("need at least 2 classes, got {g}"),
68 });
69 }
70
71 let folds = assign_folds(n, nfold, seed);
73
74 let mut fold_errors = Vec::with_capacity(nfold);
75
76 for fold in 0..nfold {
77 let (train_idx, test_idx) = fold_split(&folds, fold);
78 let train_data = extract_class_data(data, &train_idx);
79 let test_data = extract_class_data(data, &test_idx);
80 let train_labels: Vec<usize> = train_idx.iter().map(|&i| labels[i]).collect();
81 let test_labels: Vec<usize> = test_idx.iter().map(|&i| labels[i]).collect();
82
83 let train_cov = scalar_covariates.map(|c| extract_class_data(c, &train_idx));
84 let test_cov = scalar_covariates.map(|c| extract_class_data(c, &test_idx));
85
86 let predictions = cv_fold_predict(
87 &train_data,
88 &test_data,
89 argvals,
90 &train_labels,
91 g,
92 train_cov.as_ref(),
93 test_cov.as_ref(),
94 method,
95 ncomp,
96 );
97
98 let n_test = test_labels.len();
99 let errors = match predictions {
100 Some(pred) => {
101 let wrong = pred
102 .iter()
103 .zip(&test_labels)
104 .filter(|(&p, &t)| p != t)
105 .count();
106 wrong as f64 / n_test as f64
107 }
108 None => 1.0,
109 };
110 fold_errors.push(errors);
111 }
112
113 let error_rate = fold_errors.iter().sum::<f64>() / nfold as f64;
114
115 Ok(ClassifCvResult {
116 error_rate,
117 fold_errors,
118 best_ncomp: ncomp,
119 })
120}
121
122pub(super) fn assign_folds(n: usize, nfold: usize, seed: u64) -> Vec<usize> {
124 use rand::prelude::*;
125 let mut rng = StdRng::seed_from_u64(seed);
126 let mut indices: Vec<usize> = (0..n).collect();
127 indices.shuffle(&mut rng);
128
129 let mut folds = vec![0usize; n];
130 for (rank, &idx) in indices.iter().enumerate() {
131 folds[idx] = rank % nfold;
132 }
133 folds
134}
135
136pub(super) fn fold_split(folds: &[usize], fold: usize) -> (Vec<usize>, Vec<usize>) {
138 let train: Vec<usize> = (0..folds.len()).filter(|&i| folds[i] != fold).collect();
139 let test: Vec<usize> = (0..folds.len()).filter(|&i| folds[i] == fold).collect();
140 (train, test)
141}
142
143fn cv_fold_predict(
145 train_data: &FdMatrix,
146 test_data: &FdMatrix,
147 _argvals: &[f64],
148 train_labels: &[usize],
149 g: usize,
150 train_cov: Option<&FdMatrix>,
151 test_cov: Option<&FdMatrix>,
152 method: &str,
153 ncomp: usize,
154) -> Option<Vec<usize>> {
155 let m = train_data.ncols();
156 let argvals: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1).max(1) as f64).collect();
157 let fpca = fdata_to_pc_1d(train_data, ncomp, &argvals).ok()?;
158 match method {
159 "lda" => {
160 let predictions =
161 project_and_classify_lda(test_data, &fpca, train_labels, g, train_cov, test_cov);
162 Some(predictions)
163 }
164 "qda" => {
165 let predictions =
166 project_and_classify_qda(test_data, &fpca, train_labels, g, train_cov, test_cov);
167 Some(predictions)
168 }
169 "knn" => {
170 let predictions =
171 project_and_classify_knn(test_data, &fpca, train_labels, g, train_cov, test_cov, 5);
172 Some(predictions)
173 }
174 _ => None,
176 }
177}
178
179pub(super) fn project_test_onto_fpca(
181 test_data: &FdMatrix,
182 fpca: &crate::regression::FpcaResult,
183) -> FdMatrix {
184 let n_test = test_data.nrows();
185 let m = test_data.ncols();
186 let d_pc = fpca.scores.ncols();
187 let mut test_features = FdMatrix::zeros(n_test, d_pc);
188 for i in 0..n_test {
189 for k in 0..d_pc {
190 let mut score = 0.0;
191 for j in 0..m {
192 score +=
193 (test_data[(i, j)] - fpca.mean[j]) * fpca.rotation[(j, k)] * fpca.weights[j];
194 }
195 test_features[(i, k)] = score;
196 }
197 }
198 test_features
199}
200
201fn append_scalar_covariates(scores: &FdMatrix, scalar_covariates: Option<&FdMatrix>) -> FdMatrix {
203 match scalar_covariates {
204 None => scores.clone(),
205 Some(cov) => {
206 let n = scores.nrows();
207 let d_pc = scores.ncols();
208 let d_cov = cov.ncols();
209 let mut features = FdMatrix::zeros(n, d_pc + d_cov);
210 for i in 0..n {
211 for j in 0..d_pc {
212 features[(i, j)] = scores[(i, j)];
213 }
214 for j in 0..d_cov {
215 features[(i, d_pc + j)] = cov[(i, j)];
216 }
217 }
218 features
219 }
220 }
221}
222
223fn project_and_classify_lda(
225 test_data: &FdMatrix,
226 fpca: &crate::regression::FpcaResult,
227 train_labels: &[usize],
228 g: usize,
229 train_cov: Option<&FdMatrix>,
230 test_cov: Option<&FdMatrix>,
231) -> Vec<usize> {
232 let test_pc = project_test_onto_fpca(test_data, fpca);
233 let test_features = append_scalar_covariates(&test_pc, test_cov);
234
235 let train_features = append_scalar_covariates(&fpca.scores, train_cov);
236 let (class_means, cov, priors) = lda_params(&train_features, train_labels, g);
237 let d = train_features.ncols();
238 match cholesky_d(&cov, d) {
239 Ok(chol) => lda_predict(&test_features, &class_means, &chol, &priors, g),
240 Err(_) => vec![0; test_data.nrows()],
241 }
242}
243
244fn project_and_classify_qda(
246 test_data: &FdMatrix,
247 fpca: &crate::regression::FpcaResult,
248 train_labels: &[usize],
249 g: usize,
250 train_cov: Option<&FdMatrix>,
251 test_cov: Option<&FdMatrix>,
252) -> Vec<usize> {
253 let n_test = test_data.nrows();
254 let test_pc = project_test_onto_fpca(test_data, fpca);
255 let test_features = append_scalar_covariates(&test_pc, test_cov);
256
257 let train_features = append_scalar_covariates(&fpca.scores, train_cov);
258
259 match build_qda_params(&train_features, train_labels, g) {
260 Ok((class_means, class_chols, class_log_dets, priors)) => qda_predict(
261 &test_features,
262 &class_means,
263 &class_chols,
264 &class_log_dets,
265 &priors,
266 g,
267 ),
268 Err(_) => vec![0; n_test],
269 }
270}
271
272fn project_and_classify_knn(
274 test_data: &FdMatrix,
275 fpca: &crate::regression::FpcaResult,
276 train_labels: &[usize],
277 g: usize,
278 train_cov: Option<&FdMatrix>,
279 test_cov: Option<&FdMatrix>,
280 k_nn: usize,
281) -> Vec<usize> {
282 let n_test = test_data.nrows();
283 let n_train = fpca.scores.nrows();
284
285 let test_pc = project_test_onto_fpca(test_data, fpca);
286 let test_features = append_scalar_covariates(&test_pc, test_cov);
287 let train_features = append_scalar_covariates(&fpca.scores, train_cov);
288 let d = train_features.ncols();
289
290 (0..n_test)
291 .map(|i| {
292 let mut dists: Vec<(f64, usize)> = (0..n_train)
294 .map(|t| {
295 let d_sq: f64 = (0..d)
296 .map(|k| (test_features[(i, k)] - train_features[(t, k)]).powi(2))
297 .sum();
298 (d_sq, train_labels[t])
299 })
300 .collect();
301 let k_eff = k_nn.min(n_train);
302 if k_eff > 0 && k_eff < dists.len() {
303 dists.select_nth_unstable_by(k_eff - 1, |a, b| {
304 a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal)
305 });
306 }
307
308 let mut votes = vec![0usize; g];
309 for &(_, label) in dists.iter().take(k_eff) {
310 votes[label] += 1;
311 }
312 votes
313 .iter()
314 .enumerate()
315 .max_by_key(|&(_, &v)| v)
316 .map_or(0, |(c, _)| c)
317 })
318 .collect()
319}
320
321pub(super) fn extract_class_data(data: &FdMatrix, indices: &[usize]) -> FdMatrix {
323 let nc = indices.len();
324 let m = data.ncols();
325 let mut result = FdMatrix::zeros(nc, m);
326 for (ri, &i) in indices.iter().enumerate() {
327 for j in 0..m {
328 result[(ri, j)] = data[(i, j)];
329 }
330 }
331 result
332}