fdars_core/classification/
cv.rs1use crate::error::FdarError;
4use crate::matrix::FdMatrix;
5use crate::regression::fdata_to_pc_1d;
6
7use super::lda::{lda_params, lda_predict};
8use super::qda::{build_qda_params, qda_predict};
9use super::{remap_labels, ClassifCvResult};
10use crate::linalg::cholesky_d;
11
12#[must_use = "expensive computation whose result should not be discarded"]
29pub fn fclassif_cv(
30 data: &FdMatrix,
31 argvals: &[f64],
32 y: &[usize],
33 scalar_covariates: Option<&FdMatrix>,
34 method: &str,
35 ncomp: usize,
36 nfold: usize,
37 seed: u64,
38) -> Result<ClassifCvResult, FdarError> {
39 let n = data.nrows();
40 if n < nfold || nfold < 2 {
41 return Err(FdarError::InvalidParameter {
42 parameter: "nfold",
43 message: format!("need 2 <= nfold <= n, got nfold={nfold}, n={n}"),
44 });
45 }
46
47 let (labels, g) = remap_labels(y);
48 if g < 2 {
49 return Err(FdarError::InvalidParameter {
50 parameter: "y",
51 message: format!("need at least 2 classes, got {g}"),
52 });
53 }
54
55 let folds = assign_folds(n, nfold, seed);
57
58 let mut fold_errors = Vec::with_capacity(nfold);
59
60 for fold in 0..nfold {
61 let (train_idx, test_idx) = fold_split(&folds, fold);
62 let train_data = extract_class_data(data, &train_idx);
63 let test_data = extract_class_data(data, &test_idx);
64 let train_labels: Vec<usize> = train_idx.iter().map(|&i| labels[i]).collect();
65 let test_labels: Vec<usize> = test_idx.iter().map(|&i| labels[i]).collect();
66
67 let train_cov = scalar_covariates.map(|c| extract_class_data(c, &train_idx));
68 let test_cov = scalar_covariates.map(|c| extract_class_data(c, &test_idx));
69
70 let predictions = cv_fold_predict(
71 &train_data,
72 &test_data,
73 argvals,
74 &train_labels,
75 g,
76 train_cov.as_ref(),
77 test_cov.as_ref(),
78 method,
79 ncomp,
80 );
81
82 let n_test = test_labels.len();
83 let errors = match predictions {
84 Some(pred) => {
85 let wrong = pred
86 .iter()
87 .zip(&test_labels)
88 .filter(|(&p, &t)| p != t)
89 .count();
90 wrong as f64 / n_test as f64
91 }
92 None => 1.0,
93 };
94 fold_errors.push(errors);
95 }
96
97 let error_rate = fold_errors.iter().sum::<f64>() / nfold as f64;
98
99 Ok(ClassifCvResult {
100 error_rate,
101 fold_errors,
102 best_ncomp: ncomp,
103 })
104}
105
106pub(super) fn assign_folds(n: usize, nfold: usize, seed: u64) -> Vec<usize> {
108 use rand::prelude::*;
109 let mut rng = StdRng::seed_from_u64(seed);
110 let mut indices: Vec<usize> = (0..n).collect();
111 indices.shuffle(&mut rng);
112
113 let mut folds = vec![0usize; n];
114 for (rank, &idx) in indices.iter().enumerate() {
115 folds[idx] = rank % nfold;
116 }
117 folds
118}
119
120pub(super) fn fold_split(folds: &[usize], fold: usize) -> (Vec<usize>, Vec<usize>) {
122 let train: Vec<usize> = (0..folds.len()).filter(|&i| folds[i] != fold).collect();
123 let test: Vec<usize> = (0..folds.len()).filter(|&i| folds[i] == fold).collect();
124 (train, test)
125}
126
127fn cv_fold_predict(
129 train_data: &FdMatrix,
130 test_data: &FdMatrix,
131 _argvals: &[f64],
132 train_labels: &[usize],
133 g: usize,
134 train_cov: Option<&FdMatrix>,
135 test_cov: Option<&FdMatrix>,
136 method: &str,
137 ncomp: usize,
138) -> Option<Vec<usize>> {
139 let fpca = fdata_to_pc_1d(train_data, ncomp).ok()?;
140 match method {
141 "lda" => {
142 let predictions =
143 project_and_classify_lda(test_data, &fpca, train_labels, g, train_cov, test_cov);
144 Some(predictions)
145 }
146 "qda" => {
147 let predictions =
148 project_and_classify_qda(test_data, &fpca, train_labels, g, train_cov, test_cov);
149 Some(predictions)
150 }
151 "knn" => {
152 let predictions =
153 project_and_classify_knn(test_data, &fpca, train_labels, g, train_cov, test_cov, 5);
154 Some(predictions)
155 }
156 "kernel" | "dd" => None,
158 _ => None,
159 }
160}
161
162pub(super) fn project_test_onto_fpca(
164 test_data: &FdMatrix,
165 fpca: &crate::regression::FpcaResult,
166) -> FdMatrix {
167 let n_test = test_data.nrows();
168 let m = test_data.ncols();
169 let d_pc = fpca.scores.ncols();
170 let mut test_features = FdMatrix::zeros(n_test, d_pc);
171 for i in 0..n_test {
172 for k in 0..d_pc {
173 let mut score = 0.0;
174 for j in 0..m {
175 score += (test_data[(i, j)] - fpca.mean[j]) * fpca.rotation[(j, k)];
176 }
177 test_features[(i, k)] = score;
178 }
179 }
180 test_features
181}
182
183fn append_scalar_covariates(scores: &FdMatrix, scalar_covariates: Option<&FdMatrix>) -> FdMatrix {
185 match scalar_covariates {
186 None => scores.clone(),
187 Some(cov) => {
188 let n = scores.nrows();
189 let d_pc = scores.ncols();
190 let d_cov = cov.ncols();
191 let mut features = FdMatrix::zeros(n, d_pc + d_cov);
192 for i in 0..n {
193 for j in 0..d_pc {
194 features[(i, j)] = scores[(i, j)];
195 }
196 for j in 0..d_cov {
197 features[(i, d_pc + j)] = cov[(i, j)];
198 }
199 }
200 features
201 }
202 }
203}
204
205fn project_and_classify_lda(
207 test_data: &FdMatrix,
208 fpca: &crate::regression::FpcaResult,
209 train_labels: &[usize],
210 g: usize,
211 train_cov: Option<&FdMatrix>,
212 test_cov: Option<&FdMatrix>,
213) -> Vec<usize> {
214 let test_pc = project_test_onto_fpca(test_data, fpca);
215 let test_features = append_scalar_covariates(&test_pc, test_cov);
216
217 let train_features = append_scalar_covariates(&fpca.scores, train_cov);
218 let (class_means, cov, priors) = lda_params(&train_features, train_labels, g);
219 let d = train_features.ncols();
220 match cholesky_d(&cov, d) {
221 Ok(chol) => lda_predict(&test_features, &class_means, &chol, &priors, g),
222 Err(_) => vec![0; test_data.nrows()],
223 }
224}
225
226fn project_and_classify_qda(
228 test_data: &FdMatrix,
229 fpca: &crate::regression::FpcaResult,
230 train_labels: &[usize],
231 g: usize,
232 train_cov: Option<&FdMatrix>,
233 test_cov: Option<&FdMatrix>,
234) -> Vec<usize> {
235 let n_test = test_data.nrows();
236 let test_pc = project_test_onto_fpca(test_data, fpca);
237 let test_features = append_scalar_covariates(&test_pc, test_cov);
238
239 let train_features = append_scalar_covariates(&fpca.scores, train_cov);
240
241 match build_qda_params(&train_features, train_labels, g) {
242 Ok((class_means, class_chols, class_log_dets, priors)) => qda_predict(
243 &test_features,
244 &class_means,
245 &class_chols,
246 &class_log_dets,
247 &priors,
248 g,
249 ),
250 Err(_) => vec![0; n_test],
251 }
252}
253
254fn project_and_classify_knn(
256 test_data: &FdMatrix,
257 fpca: &crate::regression::FpcaResult,
258 train_labels: &[usize],
259 g: usize,
260 train_cov: Option<&FdMatrix>,
261 test_cov: Option<&FdMatrix>,
262 k_nn: usize,
263) -> Vec<usize> {
264 let n_test = test_data.nrows();
265 let n_train = fpca.scores.nrows();
266
267 let test_pc = project_test_onto_fpca(test_data, fpca);
268 let test_features = append_scalar_covariates(&test_pc, test_cov);
269 let train_features = append_scalar_covariates(&fpca.scores, train_cov);
270 let d = train_features.ncols();
271
272 (0..n_test)
273 .map(|i| {
274 let mut dists: Vec<(f64, usize)> = (0..n_train)
276 .map(|t| {
277 let d_sq: f64 = (0..d)
278 .map(|k| (test_features[(i, k)] - train_features[(t, k)]).powi(2))
279 .sum();
280 (d_sq, train_labels[t])
281 })
282 .collect();
283 let k_eff = k_nn.min(n_train);
284 if k_eff > 0 && k_eff < dists.len() {
285 dists.select_nth_unstable_by(k_eff - 1, |a, b| {
286 a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal)
287 });
288 }
289
290 let mut votes = vec![0usize; g];
291 for &(_, label) in dists.iter().take(k_eff) {
292 votes[label] += 1;
293 }
294 votes
295 .iter()
296 .enumerate()
297 .max_by_key(|&(_, &v)| v)
298 .map_or(0, |(c, _)| c)
299 })
300 .collect()
301}
302
303pub(super) fn extract_class_data(data: &FdMatrix, indices: &[usize]) -> FdMatrix {
305 let nc = indices.len();
306 let m = data.ncols();
307 let mut result = FdMatrix::zeros(nc, m);
308 for (ri, &i) in indices.iter().enumerate() {
309 for j in 0..m {
310 result[(ri, j)] = data[(i, j)];
311 }
312 }
313 result
314}