1use crate::depth::fraiman_muniz_1d;
11use crate::helpers::{l2_distance, simpsons_weights};
12use crate::matrix::FdMatrix;
13use crate::regression::fdata_to_pc_1d;
14
15pub struct ClassifResult {
17 pub predicted: Vec<usize>,
19 pub probabilities: Option<FdMatrix>,
21 pub accuracy: f64,
23 pub confusion: Vec<Vec<usize>>,
25 pub n_classes: usize,
27 pub ncomp: usize,
29}
30
31pub struct ClassifCvResult {
33 pub error_rate: f64,
35 pub fold_errors: Vec<f64>,
37 pub best_ncomp: usize,
39}
40
41fn remap_labels(y: &[usize]) -> (Vec<usize>, usize) {
47 let mut labels: Vec<usize> = y.to_vec();
48 let mut unique: Vec<usize> = y.to_vec();
49 unique.sort_unstable();
50 unique.dedup();
51 let g = unique.len();
52 for label in &mut labels {
53 *label = unique.iter().position(|&u| u == *label).unwrap_or(0);
54 }
55 (labels, g)
56}
57
58fn confusion_matrix(true_labels: &[usize], pred_labels: &[usize], g: usize) -> Vec<Vec<usize>> {
60 let mut cm = vec![vec![0usize; g]; g];
61 for (&t, &p) in true_labels.iter().zip(pred_labels.iter()) {
62 if t < g && p < g {
63 cm[t][p] += 1;
64 }
65 }
66 cm
67}
68
69fn compute_accuracy(true_labels: &[usize], pred_labels: &[usize]) -> f64 {
71 let n = true_labels.len();
72 if n == 0 {
73 return 0.0;
74 }
75 let correct = true_labels
76 .iter()
77 .zip(pred_labels.iter())
78 .filter(|(&t, &p)| t == p)
79 .count();
80 correct as f64 / n as f64
81}
82
83fn build_feature_matrix(
85 data: &FdMatrix,
86 covariates: Option<&FdMatrix>,
87 ncomp: usize,
88) -> Option<(FdMatrix, Vec<f64>, FdMatrix)> {
89 let fpca = fdata_to_pc_1d(data, ncomp)?;
90 let n = data.nrows();
91 let d_pc = fpca.scores.ncols();
92 let d_cov = covariates.map_or(0, |c| c.ncols());
93 let d = d_pc + d_cov;
94
95 let mut features = FdMatrix::zeros(n, d);
96 for i in 0..n {
97 for j in 0..d_pc {
98 features[(i, j)] = fpca.scores[(i, j)];
99 }
100 if let Some(cov) = covariates {
101 for j in 0..d_cov {
102 features[(i, d_pc + j)] = cov[(i, j)];
103 }
104 }
105 }
106
107 Some((features, fpca.mean, fpca.rotation))
108}
109
110fn class_means_and_priors(
116 features: &FdMatrix,
117 labels: &[usize],
118 g: usize,
119) -> (Vec<Vec<f64>>, Vec<usize>, Vec<f64>) {
120 let n = features.nrows();
121 let d = features.ncols();
122 let mut counts = vec![0usize; g];
123 let mut class_means = vec![vec![0.0; d]; g];
124 for i in 0..n {
125 let c = labels[i];
126 counts[c] += 1;
127 for j in 0..d {
128 class_means[c][j] += features[(i, j)];
129 }
130 }
131 for c in 0..g {
132 if counts[c] > 0 {
133 for j in 0..d {
134 class_means[c][j] /= counts[c] as f64;
135 }
136 }
137 }
138 let priors: Vec<f64> = counts.iter().map(|&c| c as f64 / n as f64).collect();
139 (class_means, counts, priors)
140}
141
142fn pooled_within_cov(
144 features: &FdMatrix,
145 labels: &[usize],
146 class_means: &[Vec<f64>],
147 g: usize,
148) -> Vec<f64> {
149 let n = features.nrows();
150 let d = features.ncols();
151 let mut cov = vec![0.0; d * d];
152 for i in 0..n {
153 let c = labels[i];
154 for r in 0..d {
155 let dr = features[(i, r)] - class_means[c][r];
156 for s in r..d {
157 let val = dr * (features[(i, s)] - class_means[c][s]);
158 cov[r * d + s] += val;
159 if r != s {
160 cov[s * d + r] += val;
161 }
162 }
163 }
164 }
165 let scale = (n - g).max(1) as f64;
166 for v in cov.iter_mut() {
167 *v /= scale;
168 }
169 for j in 0..d {
170 cov[j * d + j] += 1e-6;
171 }
172 cov
173}
174
175fn lda_params(
177 features: &FdMatrix,
178 labels: &[usize],
179 g: usize,
180) -> (Vec<Vec<f64>>, Vec<f64>, Vec<f64>) {
181 let (class_means, _counts, priors) = class_means_and_priors(features, labels, g);
182 let cov = pooled_within_cov(features, labels, &class_means, g);
183 (class_means, cov, priors)
184}
185
186fn cholesky_d(mat: &[f64], d: usize) -> Option<Vec<f64>> {
188 let mut l = vec![0.0; d * d];
189 for j in 0..d {
190 let mut sum = 0.0;
191 for k in 0..j {
192 sum += l[j * d + k] * l[j * d + k];
193 }
194 let diag = mat[j * d + j] - sum;
195 if diag <= 0.0 {
196 return None;
197 }
198 l[j * d + j] = diag.sqrt();
199 for i in (j + 1)..d {
200 let mut s = 0.0;
201 for k in 0..j {
202 s += l[i * d + k] * l[j * d + k];
203 }
204 l[i * d + j] = (mat[i * d + j] - s) / l[j * d + j];
205 }
206 }
207 Some(l)
208}
209
210fn forward_solve(l: &[f64], b: &[f64], d: usize) -> Vec<f64> {
212 let mut x = vec![0.0; d];
213 for i in 0..d {
214 let mut s = 0.0;
215 for j in 0..i {
216 s += l[i * d + j] * x[j];
217 }
218 x[i] = (b[i] - s) / l[i * d + i];
219 }
220 x
221}
222
223fn mahalanobis_sq(x: &[f64], mu: &[f64], chol: &[f64], d: usize) -> f64 {
225 let diff: Vec<f64> = x.iter().zip(mu.iter()).map(|(&a, &b)| a - b).collect();
226 let y = forward_solve(chol, &diff, d);
227 y.iter().map(|&v| v * v).sum()
228}
229
230fn lda_predict(
232 features: &FdMatrix,
233 class_means: &[Vec<f64>],
234 cov_chol: &[f64],
235 priors: &[f64],
236 g: usize,
237) -> Vec<usize> {
238 let n = features.nrows();
239 let d = features.ncols();
240
241 (0..n)
242 .map(|i| {
243 let xi: Vec<f64> = (0..d).map(|j| features[(i, j)]).collect();
244 let mut best_class = 0;
245 let mut best_score = f64::NEG_INFINITY;
246 for c in 0..g {
247 let maha = mahalanobis_sq(&xi, &class_means[c], cov_chol, d);
248 let score = priors[c].max(1e-15).ln() - 0.5 * maha;
249 if score > best_score {
250 best_score = score;
251 best_class = c;
252 }
253 }
254 best_class
255 })
256 .collect()
257}
258
259pub fn fclassif_lda(
267 data: &FdMatrix,
268 y: &[usize],
269 covariates: Option<&FdMatrix>,
270 ncomp: usize,
271) -> Option<ClassifResult> {
272 let n = data.nrows();
273 if n == 0 || y.len() != n || ncomp == 0 {
274 return None;
275 }
276
277 let (labels, g) = remap_labels(y);
278 if g < 2 {
279 return None;
280 }
281
282 let (features, _mean, _rotation) = build_feature_matrix(data, covariates, ncomp)?;
283 let d = features.ncols();
284 let (class_means, cov, priors) = lda_params(&features, &labels, g);
285 let chol = cholesky_d(&cov, d)?;
286
287 let predicted = lda_predict(&features, &class_means, &chol, &priors, g);
288 let accuracy = compute_accuracy(&labels, &predicted);
289 let confusion = confusion_matrix(&labels, &predicted, g);
290
291 Some(ClassifResult {
292 predicted,
293 probabilities: None,
294 accuracy,
295 confusion,
296 n_classes: g,
297 ncomp: features.ncols().min(ncomp),
298 })
299}
300
301fn accumulate_class_cov(
307 features: &FdMatrix,
308 members: &[usize],
309 mean: &[f64],
310 d: usize,
311) -> Vec<f64> {
312 let mut cov = vec![0.0; d * d];
313 for &i in members {
314 for r in 0..d {
315 let dr = features[(i, r)] - mean[r];
316 for s in r..d {
317 let val = dr * (features[(i, s)] - mean[s]);
318 cov[r * d + s] += val;
319 if r != s {
320 cov[s * d + r] += val;
321 }
322 }
323 }
324 }
325 cov
326}
327
328fn qda_class_covariances(
330 features: &FdMatrix,
331 labels: &[usize],
332 class_means: &[Vec<f64>],
333 g: usize,
334) -> Vec<Vec<f64>> {
335 let n = features.nrows();
336 let d = features.ncols();
337
338 (0..g)
339 .map(|c| {
340 let members: Vec<usize> = (0..n).filter(|&i| labels[i] == c).collect();
341 let nc = members.len().max(1) as f64;
342 let mut cov = accumulate_class_cov(features, &members, &class_means[c], d);
343 for v in cov.iter_mut() {
344 *v /= nc;
345 }
346 for j in 0..d {
347 cov[j * d + j] += 1e-6;
348 }
349 cov
350 })
351 .collect()
352}
353
354fn build_qda_params(
356 features: &FdMatrix,
357 labels: &[usize],
358 g: usize,
359) -> Option<(Vec<Vec<f64>>, Vec<Vec<f64>>, Vec<f64>, Vec<f64>)> {
360 let d = features.ncols();
361 let (class_means, _counts, priors) = class_means_and_priors(features, labels, g);
362 let class_covs = qda_class_covariances(features, labels, &class_means, g);
363 let mut class_chols = Vec::with_capacity(g);
364 let mut class_log_dets = Vec::with_capacity(g);
365 for cov in &class_covs {
366 let chol = cholesky_d(cov, d)?;
367 class_log_dets.push(log_det_cholesky(&chol, d));
368 class_chols.push(chol);
369 }
370 Some((class_means, class_chols, class_log_dets, priors))
371}
372
373fn log_det_cholesky(l: &[f64], d: usize) -> f64 {
375 let mut s = 0.0;
376 for i in 0..d {
377 s += l[i * d + i].ln();
378 }
379 2.0 * s
380}
381
382fn qda_predict(
384 features: &FdMatrix,
385 class_means: &[Vec<f64>],
386 class_chols: &[Vec<f64>],
387 class_log_dets: &[f64],
388 priors: &[f64],
389 g: usize,
390) -> Vec<usize> {
391 let n = features.nrows();
392 let d = features.ncols();
393
394 (0..n)
395 .map(|i| {
396 let xi: Vec<f64> = (0..d).map(|j| features[(i, j)]).collect();
397 let mut best_class = 0;
398 let mut best_score = f64::NEG_INFINITY;
399 for c in 0..g {
400 let maha = mahalanobis_sq(&xi, &class_means[c], &class_chols[c], d);
401 let score = priors[c].max(1e-15).ln() - 0.5 * (class_log_dets[c] + maha);
402 if score > best_score {
403 best_score = score;
404 best_class = c;
405 }
406 }
407 best_class
408 })
409 .collect()
410}
411
412pub fn fclassif_qda(
414 data: &FdMatrix,
415 y: &[usize],
416 covariates: Option<&FdMatrix>,
417 ncomp: usize,
418) -> Option<ClassifResult> {
419 let n = data.nrows();
420 if n == 0 || y.len() != n || ncomp == 0 {
421 return None;
422 }
423
424 let (labels, g) = remap_labels(y);
425 if g < 2 {
426 return None;
427 }
428
429 let (features, _mean, _rotation) = build_feature_matrix(data, covariates, ncomp)?;
430
431 let (class_means, class_chols, class_log_dets, priors) =
432 build_qda_params(&features, &labels, g)?;
433
434 let predicted = qda_predict(
435 &features,
436 &class_means,
437 &class_chols,
438 &class_log_dets,
439 &priors,
440 g,
441 );
442 let accuracy = compute_accuracy(&labels, &predicted);
443 let confusion = confusion_matrix(&labels, &predicted, g);
444
445 Some(ClassifResult {
446 predicted,
447 probabilities: None,
448 accuracy,
449 confusion,
450 n_classes: g,
451 ncomp: features.ncols().min(ncomp),
452 })
453}
454
455pub fn fclassif_knn(
468 data: &FdMatrix,
469 y: &[usize],
470 covariates: Option<&FdMatrix>,
471 ncomp: usize,
472 k_nn: usize,
473) -> Option<ClassifResult> {
474 let n = data.nrows();
475 if n == 0 || y.len() != n || ncomp == 0 || k_nn == 0 {
476 return None;
477 }
478
479 let (labels, g) = remap_labels(y);
480 if g < 2 {
481 return None;
482 }
483
484 let (features, _mean, _rotation) = build_feature_matrix(data, covariates, ncomp)?;
485 let d = features.ncols();
486
487 let predicted = knn_predict_loo(&features, &labels, g, d, k_nn);
488 let accuracy = compute_accuracy(&labels, &predicted);
489 let confusion = confusion_matrix(&labels, &predicted, g);
490
491 Some(ClassifResult {
492 predicted,
493 probabilities: None,
494 accuracy,
495 confusion,
496 n_classes: g,
497 ncomp: d.min(ncomp),
498 })
499}
500
501fn knn_predict_loo(
503 features: &FdMatrix,
504 labels: &[usize],
505 g: usize,
506 d: usize,
507 k_nn: usize,
508) -> Vec<usize> {
509 let n = features.nrows();
510 let k_nn = k_nn.min(n - 1);
511
512 (0..n)
513 .map(|i| {
514 let xi: Vec<f64> = (0..d).map(|j| features[(i, j)]).collect();
515 let mut dists: Vec<(f64, usize)> = (0..n)
516 .filter(|&j| j != i)
517 .map(|j| {
518 let xj: Vec<f64> = (0..d).map(|jj| features[(j, jj)]).collect();
519 let d_sq: f64 = xi.iter().zip(&xj).map(|(&a, &b)| (a - b).powi(2)).sum();
520 (d_sq, labels[j])
521 })
522 .collect();
523 dists.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
524
525 let mut votes = vec![0usize; g];
527 for &(_, label) in dists.iter().take(k_nn) {
528 votes[label] += 1;
529 }
530 votes
531 .iter()
532 .enumerate()
533 .max_by_key(|&(_, &v)| v)
534 .map(|(c, _)| c)
535 .unwrap_or(0)
536 })
537 .collect()
538}
539
540fn argmax_class(scores: &[f64]) -> usize {
546 scores
547 .iter()
548 .enumerate()
549 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
550 .map(|(c, _)| c)
551 .unwrap_or(0)
552}
553
554fn scalar_depth_for_obs(cov: &FdMatrix, i: usize, class_indices: &[usize], p: usize) -> f64 {
556 let nc = class_indices.len() as f64;
557 if nc < 1.0 || p == 0 {
558 return 0.0;
559 }
560 let mut depth = 0.0;
561 for j in 0..p {
562 let val = cov[(i, j)];
563 let rank = class_indices
564 .iter()
565 .filter(|&&k| cov[(k, j)] <= val)
566 .count() as f64;
567 let u = rank / nc.max(1.0);
568 depth += u.min(1.0 - u).min(0.5);
569 }
570 depth / p as f64
571}
572
573fn bandwidth_candidates(dists: &[f64], n: usize) -> Vec<f64> {
575 let mut all_dists: Vec<f64> = Vec::new();
576 for i in 0..n {
577 for j in (i + 1)..n {
578 all_dists.push(dists[i * n + j]);
579 }
580 }
581 all_dists.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
582
583 (1..=20)
584 .map(|p| {
585 let idx = (p as f64 / 20.0 * (all_dists.len() - 1) as f64) as usize;
586 all_dists[idx.min(all_dists.len() - 1)]
587 })
588 .filter(|&h| h > 1e-15)
589 .collect()
590}
591
592fn loo_accuracy_for_bandwidth(dists: &[f64], labels: &[usize], g: usize, n: usize, h: f64) -> f64 {
594 let mut correct = 0;
595 for i in 0..n {
596 let mut votes = vec![0.0; g];
597 for j in 0..n {
598 if j != i {
599 votes[labels[j]] += gaussian_kernel(dists[i * n + j], h);
600 }
601 }
602 if argmax_class(&votes) == labels[i] {
603 correct += 1;
604 }
605 }
606 correct as f64 / n as f64
607}
608
609fn gaussian_kernel(dist: f64, h: f64) -> f64 {
611 if h < 1e-15 {
612 return 0.0;
613 }
614 (-dist * dist / (2.0 * h * h)).exp()
615}
616
617pub fn fclassif_kernel(
629 data: &FdMatrix,
630 argvals: &[f64],
631 y: &[usize],
632 covariates: Option<&FdMatrix>,
633 h_func: f64,
634 h_scalar: f64,
635) -> Option<ClassifResult> {
636 let n = data.nrows();
637 let m = data.ncols();
638 if n == 0 || y.len() != n || argvals.len() != m {
639 return None;
640 }
641
642 let (labels, g) = remap_labels(y);
643 if g < 2 {
644 return None;
645 }
646
647 let weights = simpsons_weights(argvals);
648
649 let func_dists = compute_pairwise_l2(data, &weights);
651
652 let scalar_dists = covariates.map(compute_pairwise_scalar);
654
655 let h_f = if h_func > 0.0 {
657 h_func
658 } else {
659 select_bandwidth_loo(&func_dists, &labels, g, n, true)
660 };
661 let h_s = match &scalar_dists {
662 Some(sd) if h_scalar <= 0.0 => select_bandwidth_loo(sd, &labels, g, n, false),
663 _ => h_scalar,
664 };
665
666 let predicted = kernel_classify_loo(
667 &func_dists,
668 scalar_dists.as_deref(),
669 &labels,
670 g,
671 n,
672 h_f,
673 h_s,
674 );
675 let accuracy = compute_accuracy(&labels, &predicted);
676 let confusion = confusion_matrix(&labels, &predicted, g);
677
678 Some(ClassifResult {
679 predicted,
680 probabilities: None,
681 accuracy,
682 confusion,
683 n_classes: g,
684 ncomp: 0,
685 })
686}
687
688fn compute_pairwise_l2(data: &FdMatrix, weights: &[f64]) -> Vec<f64> {
690 let n = data.nrows();
691 let mut dists = vec![0.0; n * n];
692 for i in 0..n {
693 let ri = data.row(i);
694 for j in (i + 1)..n {
695 let rj = data.row(j);
696 let d = l2_distance(&ri, &rj, weights);
697 dists[i * n + j] = d;
698 dists[j * n + i] = d;
699 }
700 }
701 dists
702}
703
704fn compute_pairwise_scalar(covariates: &FdMatrix) -> Vec<f64> {
706 let n = covariates.nrows();
707 let p = covariates.ncols();
708 let mut dists = vec![0.0; n * n];
709 for i in 0..n {
710 for j in (i + 1)..n {
711 let mut d_sq = 0.0;
712 for k in 0..p {
713 d_sq += (covariates[(i, k)] - covariates[(j, k)]).powi(2);
714 }
715 let d = d_sq.sqrt();
716 dists[i * n + j] = d;
717 dists[j * n + i] = d;
718 }
719 }
720 dists
721}
722
723fn select_bandwidth_loo(dists: &[f64], labels: &[usize], g: usize, n: usize, is_func: bool) -> f64 {
725 let candidates = bandwidth_candidates(dists, n);
726 if candidates.is_empty() {
727 return if is_func { 1.0 } else { 0.5 };
728 }
729
730 let mut best_h = candidates[0];
731 let mut best_acc = 0.0;
732 for &h in &candidates {
733 let acc = loo_accuracy_for_bandwidth(dists, labels, g, n, h);
734 if acc > best_acc {
735 best_acc = acc;
736 best_h = h;
737 }
738 }
739 best_h
740}
741
742fn kernel_classify_loo(
744 func_dists: &[f64],
745 scalar_dists: Option<&[f64]>,
746 labels: &[usize],
747 g: usize,
748 n: usize,
749 h_func: f64,
750 h_scalar: f64,
751) -> Vec<usize> {
752 (0..n)
753 .map(|i| {
754 let mut votes = vec![0.0; g];
755 for j in 0..n {
756 if j == i {
757 continue;
758 }
759 let kf = gaussian_kernel(func_dists[i * n + j], h_func);
760 let ks = match scalar_dists {
761 Some(sd) if h_scalar > 1e-15 => gaussian_kernel(sd[i * n + j], h_scalar),
762 _ => 1.0,
763 };
764 votes[labels[j]] += kf * ks;
765 }
766 argmax_class(&votes)
767 })
768 .collect()
769}
770
771fn compute_class_depths(data: &FdMatrix, class_indices: &[Vec<usize>], n: usize) -> FdMatrix {
781 let g = class_indices.len();
782 let mut depth_scores = FdMatrix::zeros(n, g);
783 for c in 0..g {
784 if class_indices[c].is_empty() {
785 continue;
786 }
787 let class_data = extract_class_data(data, &class_indices[c]);
788 let depths = fraiman_muniz_1d(data, &class_data, true);
789 for i in 0..n {
790 depth_scores[(i, c)] = depths[i];
791 }
792 }
793 depth_scores
794}
795
796fn blend_scalar_depths(
798 depth_scores: &mut FdMatrix,
799 cov: &FdMatrix,
800 class_indices: &[Vec<usize>],
801 n: usize,
802) {
803 let g = class_indices.len();
804 let p = cov.ncols();
805 for c in 0..g {
806 for i in 0..n {
807 let sd = scalar_depth_for_obs(cov, i, &class_indices[c], p);
808 depth_scores[(i, c)] = 0.7 * depth_scores[(i, c)] + 0.3 * sd;
809 }
810 }
811}
812
813pub fn fclassif_dd(
814 data: &FdMatrix,
815 y: &[usize],
816 covariates: Option<&FdMatrix>,
817) -> Option<ClassifResult> {
818 let n = data.nrows();
819 if n == 0 || y.len() != n {
820 return None;
821 }
822
823 let (labels, g) = remap_labels(y);
824 if g < 2 {
825 return None;
826 }
827
828 let class_indices: Vec<Vec<usize>> = (0..g)
829 .map(|c| (0..n).filter(|&i| labels[i] == c).collect())
830 .collect();
831
832 let mut depth_scores = compute_class_depths(data, &class_indices, n);
833
834 if let Some(cov) = covariates {
835 blend_scalar_depths(&mut depth_scores, cov, &class_indices, n);
836 }
837
838 let predicted: Vec<usize> = (0..n)
839 .map(|i| {
840 let scores: Vec<f64> = (0..g).map(|c| depth_scores[(i, c)]).collect();
841 argmax_class(&scores)
842 })
843 .collect();
844
845 let accuracy = compute_accuracy(&labels, &predicted);
846 let confusion = confusion_matrix(&labels, &predicted, g);
847
848 Some(ClassifResult {
849 predicted,
850 probabilities: Some(depth_scores),
851 accuracy,
852 confusion,
853 n_classes: g,
854 ncomp: 0,
855 })
856}
857
858fn extract_class_data(data: &FdMatrix, indices: &[usize]) -> FdMatrix {
860 let nc = indices.len();
861 let m = data.ncols();
862 let mut result = FdMatrix::zeros(nc, m);
863 for (ri, &i) in indices.iter().enumerate() {
864 for j in 0..m {
865 result[(ri, j)] = data[(i, j)];
866 }
867 }
868 result
869}
870
871pub fn fclassif_cv(
887 data: &FdMatrix,
888 argvals: &[f64],
889 y: &[usize],
890 covariates: Option<&FdMatrix>,
891 method: &str,
892 ncomp: usize,
893 nfold: usize,
894 seed: u64,
895) -> Option<ClassifCvResult> {
896 let n = data.nrows();
897 if n < nfold || nfold < 2 {
898 return None;
899 }
900
901 let (labels, g) = remap_labels(y);
902 if g < 2 {
903 return None;
904 }
905
906 let folds = assign_folds(n, nfold, seed);
908
909 let mut fold_errors = Vec::with_capacity(nfold);
910
911 for fold in 0..nfold {
912 let (train_idx, test_idx) = fold_split(&folds, fold);
913 let train_data = extract_class_data(data, &train_idx);
914 let test_data = extract_class_data(data, &test_idx);
915 let train_labels: Vec<usize> = train_idx.iter().map(|&i| labels[i]).collect();
916 let test_labels: Vec<usize> = test_idx.iter().map(|&i| labels[i]).collect();
917
918 let train_cov = covariates.map(|c| extract_class_data(c, &train_idx));
919 let test_cov = covariates.map(|c| extract_class_data(c, &test_idx));
920
921 let predictions = cv_fold_predict(
922 &train_data,
923 &test_data,
924 argvals,
925 &train_labels,
926 train_cov.as_ref(),
927 test_cov.as_ref(),
928 method,
929 ncomp,
930 );
931
932 let n_test = test_labels.len();
933 let errors = match predictions {
934 Some(pred) => {
935 let wrong = pred
936 .iter()
937 .zip(&test_labels)
938 .filter(|(&p, &t)| p != t)
939 .count();
940 wrong as f64 / n_test as f64
941 }
942 None => 1.0,
943 };
944 fold_errors.push(errors);
945 }
946
947 let error_rate = fold_errors.iter().sum::<f64>() / nfold as f64;
948
949 Some(ClassifCvResult {
950 error_rate,
951 fold_errors,
952 best_ncomp: ncomp,
953 })
954}
955
956fn assign_folds(n: usize, nfold: usize, seed: u64) -> Vec<usize> {
958 use rand::prelude::*;
959 let mut rng = StdRng::seed_from_u64(seed);
960 let mut indices: Vec<usize> = (0..n).collect();
961 indices.shuffle(&mut rng);
962
963 let mut folds = vec![0usize; n];
964 for (rank, &idx) in indices.iter().enumerate() {
965 folds[idx] = rank % nfold;
966 }
967 folds
968}
969
970fn fold_split(folds: &[usize], fold: usize) -> (Vec<usize>, Vec<usize>) {
972 let train: Vec<usize> = (0..folds.len()).filter(|&i| folds[i] != fold).collect();
973 let test: Vec<usize> = (0..folds.len()).filter(|&i| folds[i] == fold).collect();
974 (train, test)
975}
976
977fn cv_fold_predict(
979 train_data: &FdMatrix,
980 test_data: &FdMatrix,
981 argvals: &[f64],
982 train_labels: &[usize],
983 train_cov: Option<&FdMatrix>,
984 _test_cov: Option<&FdMatrix>,
985 method: &str,
986 ncomp: usize,
987) -> Option<Vec<usize>> {
988 match method {
989 "lda" => {
990 let _result = fclassif_lda(train_data, train_labels, train_cov, ncomp)?;
991 let fpca = fdata_to_pc_1d(train_data, ncomp)?;
993 let predictions =
994 project_and_classify_lda(test_data, &fpca, train_labels, train_cov, ncomp);
995 Some(predictions)
996 }
997 "qda" => {
998 let _result = fclassif_qda(train_data, train_labels, train_cov, ncomp)?;
999 let fpca = fdata_to_pc_1d(train_data, ncomp)?;
1000 let predictions =
1001 project_and_classify_qda(test_data, &fpca, train_labels, train_cov, ncomp);
1002 Some(predictions)
1003 }
1004 "knn" => {
1005 let fpca = fdata_to_pc_1d(train_data, ncomp)?;
1006 let predictions =
1007 project_and_classify_knn(test_data, &fpca, train_labels, train_cov, ncomp, 5);
1008 Some(predictions)
1009 }
1010 "kernel" => {
1011 let result = fclassif_kernel(train_data, argvals, train_labels, train_cov, 0.0, 0.0)?;
1013 Some(vec![result.predicted[0]; test_data.nrows()]) }
1016 "dd" => {
1017 let result = fclassif_dd(train_data, train_labels, train_cov)?;
1018 Some(vec![result.predicted[0]; test_data.nrows()])
1019 }
1020 _ => None,
1021 }
1022}
1023
1024fn project_test_onto_fpca(test_data: &FdMatrix, fpca: &crate::regression::FpcaResult) -> FdMatrix {
1026 let n_test = test_data.nrows();
1027 let m = test_data.ncols();
1028 let d_pc = fpca.scores.ncols();
1029 let mut test_features = FdMatrix::zeros(n_test, d_pc);
1030 for i in 0..n_test {
1031 for k in 0..d_pc {
1032 let mut score = 0.0;
1033 for j in 0..m {
1034 score += (test_data[(i, j)] - fpca.mean[j]) * fpca.rotation[(j, k)];
1035 }
1036 test_features[(i, k)] = score;
1037 }
1038 }
1039 test_features
1040}
1041
1042fn project_and_classify_lda(
1044 test_data: &FdMatrix,
1045 fpca: &crate::regression::FpcaResult,
1046 train_labels: &[usize],
1047 _train_cov: Option<&FdMatrix>,
1048 _ncomp: usize,
1049) -> Vec<usize> {
1050 let test_features = project_test_onto_fpca(test_data, fpca);
1051
1052 let train_features = &fpca.scores;
1053 let (labels, g) = remap_labels(train_labels);
1054 let (class_means, cov, priors) = lda_params(train_features, &labels, g);
1055 let d = train_features.ncols();
1056 match cholesky_d(&cov, d) {
1057 Some(chol) => lda_predict(&test_features, &class_means, &chol, &priors, g),
1058 None => vec![0; test_data.nrows()],
1059 }
1060}
1061
1062fn project_and_classify_qda(
1064 test_data: &FdMatrix,
1065 fpca: &crate::regression::FpcaResult,
1066 train_labels: &[usize],
1067 _train_cov: Option<&FdMatrix>,
1068 _ncomp: usize,
1069) -> Vec<usize> {
1070 let n_test = test_data.nrows();
1071 let test_features = project_test_onto_fpca(test_data, fpca);
1072
1073 let (labels, g) = remap_labels(train_labels);
1074 let train_features = &fpca.scores;
1075
1076 match build_qda_params(train_features, &labels, g) {
1077 Some((class_means, class_chols, class_log_dets, priors)) => qda_predict(
1078 &test_features,
1079 &class_means,
1080 &class_chols,
1081 &class_log_dets,
1082 &priors,
1083 g,
1084 ),
1085 None => vec![0; n_test],
1086 }
1087}
1088
1089fn project_and_classify_knn(
1091 test_data: &FdMatrix,
1092 fpca: &crate::regression::FpcaResult,
1093 train_labels: &[usize],
1094 _train_cov: Option<&FdMatrix>,
1095 _ncomp: usize,
1096 k_nn: usize,
1097) -> Vec<usize> {
1098 let n_test = test_data.nrows();
1099 let n_train = fpca.scores.nrows();
1100 let m = test_data.ncols();
1101 let d = fpca.scores.ncols();
1102
1103 let (labels, g) = remap_labels(train_labels);
1104
1105 (0..n_test)
1106 .map(|i| {
1107 let mut xi = vec![0.0; d];
1109 for k in 0..d {
1110 for j in 0..m {
1111 xi[k] += (test_data[(i, j)] - fpca.mean[j]) * fpca.rotation[(j, k)];
1112 }
1113 }
1114
1115 let mut dists: Vec<(f64, usize)> = (0..n_train)
1117 .map(|t| {
1118 let d_sq: f64 = (0..d).map(|k| (xi[k] - fpca.scores[(t, k)]).powi(2)).sum();
1119 (d_sq, labels[t])
1120 })
1121 .collect();
1122 dists.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
1123
1124 let mut votes = vec![0usize; g];
1125 for &(_, label) in dists.iter().take(k_nn.min(n_train)) {
1126 votes[label] += 1;
1127 }
1128 votes
1129 .iter()
1130 .enumerate()
1131 .max_by_key(|&(_, &v)| v)
1132 .map(|(c, _)| c)
1133 .unwrap_or(0)
1134 })
1135 .collect()
1136}
1137
1138#[cfg(test)]
1143mod tests {
1144 use super::*;
1145 use std::f64::consts::PI;
1146
1147 fn uniform_grid(m: usize) -> Vec<f64> {
1148 (0..m).map(|i| i as f64 / (m - 1) as f64).collect()
1149 }
1150
1151 fn generate_two_class_data(n_per: usize, m: usize) -> (FdMatrix, Vec<usize>, Vec<f64>) {
1153 let t = uniform_grid(m);
1154 let n = 2 * n_per;
1155 let mut col_major = vec![0.0; n * m];
1156
1157 for i in 0..n_per {
1158 for (j, &tj) in t.iter().enumerate() {
1159 col_major[i + j * n] =
1161 (2.0 * PI * tj).sin() + 0.05 * ((i * 7 + j * 3) % 100) as f64 / 100.0;
1162 }
1163 }
1164 for i in 0..n_per {
1165 for (j, &tj) in t.iter().enumerate() {
1166 col_major[(i + n_per) + j * n] =
1168 -(2.0 * PI * tj).sin() + 0.05 * ((i * 7 + j * 3) % 100) as f64 / 100.0;
1169 }
1170 }
1171
1172 let data = FdMatrix::from_column_major(col_major, n, m).unwrap();
1173 let labels: Vec<usize> = (0..n).map(|i| if i < n_per { 0 } else { 1 }).collect();
1174 (data, labels, t)
1175 }
1176
1177 #[test]
1178 fn test_fclassif_lda_basic() {
1179 let (data, labels, _t) = generate_two_class_data(20, 50);
1180 let result = fclassif_lda(&data, &labels, None, 3).unwrap();
1181
1182 assert_eq!(result.predicted.len(), 40);
1183 assert_eq!(result.n_classes, 2);
1184 assert!(
1185 result.accuracy > 0.8,
1186 "LDA accuracy should be high: {}",
1187 result.accuracy
1188 );
1189 }
1190
1191 #[test]
1192 fn test_fclassif_qda_basic() {
1193 let (data, labels, _t) = generate_two_class_data(20, 50);
1194 let result = fclassif_qda(&data, &labels, None, 3).unwrap();
1195
1196 assert_eq!(result.predicted.len(), 40);
1197 assert!(
1198 result.accuracy > 0.8,
1199 "QDA accuracy should be high: {}",
1200 result.accuracy
1201 );
1202 }
1203
1204 #[test]
1205 fn test_fclassif_knn_basic() {
1206 let (data, labels, _t) = generate_two_class_data(20, 50);
1207 let result = fclassif_knn(&data, &labels, None, 3, 5).unwrap();
1208
1209 assert_eq!(result.predicted.len(), 40);
1210 assert!(
1211 result.accuracy > 0.7,
1212 "k-NN accuracy should be reasonable: {}",
1213 result.accuracy
1214 );
1215 }
1216
1217 #[test]
1218 fn test_fclassif_kernel_basic() {
1219 let (data, labels, t) = generate_two_class_data(20, 50);
1220 let result = fclassif_kernel(&data, &t, &labels, None, 0.0, 0.0).unwrap();
1221
1222 assert_eq!(result.predicted.len(), 40);
1223 assert!(
1224 result.accuracy > 0.7,
1225 "Kernel accuracy should be reasonable: {}",
1226 result.accuracy
1227 );
1228 }
1229
1230 #[test]
1231 fn test_fclassif_dd_basic() {
1232 let (data, labels, _t) = generate_two_class_data(20, 50);
1233 let result = fclassif_dd(&data, &labels, None).unwrap();
1234
1235 assert_eq!(result.predicted.len(), 40);
1236 assert_eq!(result.n_classes, 2);
1237 assert!(
1239 result.accuracy > 0.6,
1240 "DD accuracy should be reasonable: {}",
1241 result.accuracy
1242 );
1243 assert!(result.probabilities.is_some());
1244 }
1245
1246 #[test]
1247 fn test_confusion_matrix_shape() {
1248 let (data, labels, _t) = generate_two_class_data(15, 50);
1249 let result = fclassif_lda(&data, &labels, None, 2).unwrap();
1250
1251 assert_eq!(result.confusion.len(), 2);
1252 assert_eq!(result.confusion[0].len(), 2);
1253 assert_eq!(result.confusion[1].len(), 2);
1254
1255 let total: usize = result.confusion.iter().flat_map(|row| row.iter()).sum();
1257 assert_eq!(total, 30);
1258 }
1259
1260 #[test]
1261 fn test_fclassif_cv_lda() {
1262 let (data, labels, t) = generate_two_class_data(25, 50);
1263 let result = fclassif_cv(&data, &t, &labels, None, "lda", 3, 5, 42).unwrap();
1264
1265 assert_eq!(result.fold_errors.len(), 5);
1266 assert!(
1267 result.error_rate < 0.3,
1268 "CV error should be low: {}",
1269 result.error_rate
1270 );
1271 }
1272
1273 #[test]
1274 fn test_fclassif_invalid_input() {
1275 let data = FdMatrix::zeros(0, 0);
1276 assert!(fclassif_lda(&data, &[], None, 1).is_none());
1277
1278 let data = FdMatrix::zeros(10, 50);
1279 let labels = vec![0; 10]; assert!(fclassif_lda(&data, &labels, None, 1).is_none());
1281 }
1282
1283 #[test]
1284 fn test_remap_labels() {
1285 let (mapped, g) = remap_labels(&[5, 5, 10, 10, 20]);
1286 assert_eq!(g, 3);
1287 assert_eq!(mapped, vec![0, 0, 1, 1, 2]);
1288 }
1289
1290 #[test]
1291 fn test_fclassif_lda_with_covariates() {
1292 let n_per = 15;
1293 let n = 2 * n_per;
1294 let m = 50;
1295 let t = uniform_grid(m);
1296
1297 let mut col_major = vec![0.0; n * m];
1299 for i in 0..n {
1300 for (j, &tj) in t.iter().enumerate() {
1301 col_major[i + j * n] = (2.0 * PI * tj).sin();
1302 }
1303 }
1304 let data = FdMatrix::from_column_major(col_major, n, m).unwrap();
1305
1306 let mut cov_data = vec![0.0; n];
1308 for i in n_per..n {
1309 cov_data[i] = 10.0;
1310 }
1311 let covariates = FdMatrix::from_column_major(cov_data, n, 1).unwrap();
1312
1313 let labels: Vec<usize> = (0..n).map(|i| if i < n_per { 0 } else { 1 }).collect();
1314
1315 let result = fclassif_lda(&data, &labels, Some(&covariates), 2).unwrap();
1316 assert!(
1317 result.accuracy > 0.9,
1318 "Covariate should enable separation: {}",
1319 result.accuracy
1320 );
1321 }
1322
1323 #[test]
1324 fn test_fclassif_three_classes() {
1325 let n_per = 15;
1326 let n = 3 * n_per;
1327 let m = 50;
1328 let t = uniform_grid(m);
1329
1330 let mut col_major = vec![0.0; n * m];
1331 for i in 0..n_per {
1333 for (j, &tj) in t.iter().enumerate() {
1334 col_major[i + j * n] =
1335 (2.0 * PI * tj).sin() + 0.02 * ((i * 7 + j * 3) % 100) as f64 / 100.0;
1336 }
1337 }
1338 for i in 0..n_per {
1340 for (j, &tj) in t.iter().enumerate() {
1341 col_major[(i + n_per) + j * n] =
1342 (2.0 * PI * tj).cos() + 0.02 * ((i * 7 + j * 3) % 100) as f64 / 100.0;
1343 }
1344 }
1345 for i in 0..n_per {
1347 for (j, _) in t.iter().enumerate() {
1348 col_major[(i + 2 * n_per) + j * n] =
1349 3.0 + 0.02 * ((i * 7 + j * 3) % 100) as f64 / 100.0;
1350 }
1351 }
1352
1353 let data = FdMatrix::from_column_major(col_major, n, m).unwrap();
1354 let labels: Vec<usize> = (0..n)
1355 .map(|i| {
1356 if i < n_per {
1357 0
1358 } else if i < 2 * n_per {
1359 1
1360 } else {
1361 2
1362 }
1363 })
1364 .collect();
1365
1366 let result = fclassif_lda(&data, &labels, None, 3).unwrap();
1367 assert_eq!(result.n_classes, 3);
1368 assert!(
1369 result.accuracy > 0.8,
1370 "Three-class accuracy: {}",
1371 result.accuracy
1372 );
1373 }
1374}