fdars_core/classification/
lda.rs1use crate::error::FdarError;
4use crate::linalg::{cholesky_d, mahalanobis_sq};
5use crate::matrix::FdMatrix;
6
7use super::{
8 build_feature_matrix, class_means_and_priors, compute_accuracy, confusion_matrix, remap_labels,
9 ClassifResult,
10};
11
12fn pooled_within_cov(
14 features: &FdMatrix,
15 labels: &[usize],
16 class_means: &[Vec<f64>],
17 g: usize,
18) -> Vec<f64> {
19 let n = features.nrows();
20 let d = features.ncols();
21 let mut cov = vec![0.0; d * d];
22 for i in 0..n {
23 let c = labels[i];
24 for r in 0..d {
25 let dr = features[(i, r)] - class_means[c][r];
26 for s in r..d {
27 let val = dr * (features[(i, s)] - class_means[c][s]);
28 cov[r * d + s] += val;
29 if r != s {
30 cov[s * d + r] += val;
31 }
32 }
33 }
34 }
35 let scale = (n - g).max(1) as f64;
36 for v in &mut cov {
37 *v /= scale;
38 }
39 for j in 0..d {
40 cov[j * d + j] += 1e-6;
41 }
42 cov
43}
44
45pub(crate) fn lda_params(
47 features: &FdMatrix,
48 labels: &[usize],
49 g: usize,
50) -> (Vec<Vec<f64>>, Vec<f64>, Vec<f64>) {
51 let (class_means, _counts, priors) = class_means_and_priors(features, labels, g);
52 let cov = pooled_within_cov(features, labels, &class_means, g);
53 (class_means, cov, priors)
54}
55
56pub(crate) fn lda_predict(
58 features: &FdMatrix,
59 class_means: &[Vec<f64>],
60 cov_chol: &[f64],
61 priors: &[f64],
62 g: usize,
63) -> Vec<usize> {
64 let n = features.nrows();
65 let d = features.ncols();
66
67 (0..n)
68 .map(|i| {
69 let xi: Vec<f64> = (0..d).map(|j| features[(i, j)]).collect();
70 let mut best_class = 0;
71 let mut best_score = f64::NEG_INFINITY;
72 for c in 0..g {
73 let maha = mahalanobis_sq(&xi, &class_means[c], cov_chol, d);
74 let score = priors[c].max(1e-15).ln() - 0.5 * maha;
75 if score > best_score {
76 best_score = score;
77 best_class = c;
78 }
79 }
80 best_class
81 })
82 .collect()
83}
84
85#[must_use = "expensive computation whose result should not be discarded"]
117pub fn fclassif_lda(
118 data: &FdMatrix,
119 y: &[usize],
120 scalar_covariates: Option<&FdMatrix>,
121 ncomp: usize,
122) -> Result<ClassifResult, FdarError> {
123 let n = data.nrows();
124 if n == 0 || y.len() != n {
125 return Err(FdarError::InvalidDimension {
126 parameter: "data/y",
127 expected: "n > 0 and y.len() == n".to_string(),
128 actual: format!("n={}, y.len()={}", n, y.len()),
129 });
130 }
131 if ncomp == 0 {
132 return Err(FdarError::InvalidParameter {
133 parameter: "ncomp",
134 message: "must be > 0".to_string(),
135 });
136 }
137
138 let (labels, g) = remap_labels(y);
139 if g < 2 {
140 return Err(FdarError::InvalidParameter {
141 parameter: "y",
142 message: format!("need at least 2 classes, got {g}"),
143 });
144 }
145
146 let (features, _mean, _rotation) = build_feature_matrix(data, scalar_covariates, ncomp)?;
147 let d = features.ncols();
148 let (class_means, cov, priors) = lda_params(&features, &labels, g);
149 let chol = cholesky_d(&cov, d)?;
150
151 let predicted = lda_predict(&features, &class_means, &chol, &priors, g);
152 let accuracy = compute_accuracy(&labels, &predicted);
153 let confusion = confusion_matrix(&labels, &predicted, g);
154
155 Ok(ClassifResult {
156 predicted,
157 probabilities: None,
158 accuracy,
159 confusion,
160 n_classes: g,
161 ncomp: features.ncols().min(ncomp),
162 })
163}