Skip to main content

fdars_core/classification/
lda.rs

1//! LDA: Linear Discriminant Analysis internals.
2
3use crate::error::FdarError;
4use crate::linalg::{cholesky_d, mahalanobis_sq};
5use crate::matrix::FdMatrix;
6
7use super::{
8    build_feature_matrix, class_means_and_priors, compute_accuracy, confusion_matrix, remap_labels,
9    ClassifResult,
10};
11
12/// Compute pooled within-class covariance (symmetric, regularized).
13fn pooled_within_cov(
14    features: &FdMatrix,
15    labels: &[usize],
16    class_means: &[Vec<f64>],
17    g: usize,
18) -> Vec<f64> {
19    let n = features.nrows();
20    let d = features.ncols();
21    let mut cov = vec![0.0; d * d];
22    for i in 0..n {
23        let c = labels[i];
24        for r in 0..d {
25            let dr = features[(i, r)] - class_means[c][r];
26            for s in r..d {
27                let val = dr * (features[(i, s)] - class_means[c][s]);
28                cov[r * d + s] += val;
29                if r != s {
30                    cov[s * d + r] += val;
31                }
32            }
33        }
34    }
35    let scale = (n - g).max(1) as f64;
36    for v in cov.iter_mut() {
37        *v /= scale;
38    }
39    for j in 0..d {
40        cov[j * d + j] += 1e-6;
41    }
42    cov
43}
44
45/// Compute per-class means and pooled within-class covariance.
46pub(crate) fn lda_params(
47    features: &FdMatrix,
48    labels: &[usize],
49    g: usize,
50) -> (Vec<Vec<f64>>, Vec<f64>, Vec<f64>) {
51    let (class_means, _counts, priors) = class_means_and_priors(features, labels, g);
52    let cov = pooled_within_cov(features, labels, &class_means, g);
53    (class_means, cov, priors)
54}
55
56/// LDA prediction: assign to class minimizing Mahalanobis distance (with prior).
57pub(crate) fn lda_predict(
58    features: &FdMatrix,
59    class_means: &[Vec<f64>],
60    cov_chol: &[f64],
61    priors: &[f64],
62    g: usize,
63) -> Vec<usize> {
64    let n = features.nrows();
65    let d = features.ncols();
66
67    (0..n)
68        .map(|i| {
69            let xi: Vec<f64> = (0..d).map(|j| features[(i, j)]).collect();
70            let mut best_class = 0;
71            let mut best_score = f64::NEG_INFINITY;
72            for c in 0..g {
73                let maha = mahalanobis_sq(&xi, &class_means[c], cov_chol, d);
74                let score = priors[c].max(1e-15).ln() - 0.5 * maha;
75                if score > best_score {
76                    best_score = score;
77                    best_class = c;
78                }
79            }
80            best_class
81        })
82        .collect()
83}
84
85/// FPC + LDA classification.
86///
87/// # Arguments
88/// * `data` — Functional data (n × m)
89/// * `y` — Class labels (length n)
90/// * `scalar_covariates` — Optional scalar covariates (n × p)
91/// * `ncomp` — Number of FPC components
92///
93/// # Errors
94///
95/// Returns [`FdarError::InvalidDimension`] if `data` has zero rows or `y.len() != n`.
96/// Returns [`FdarError::InvalidParameter`] if `ncomp` is zero.
97/// Returns [`FdarError::InvalidParameter`] if `y` contains fewer than 2 distinct classes.
98/// Returns [`FdarError::ComputationFailed`] if the SVD decomposition in FPCA fails.
99/// Returns [`FdarError::ComputationFailed`] if the pooled covariance Cholesky factorization fails.
100#[must_use = "expensive computation whose result should not be discarded"]
101pub fn fclassif_lda(
102    data: &FdMatrix,
103    y: &[usize],
104    scalar_covariates: Option<&FdMatrix>,
105    ncomp: usize,
106) -> Result<ClassifResult, FdarError> {
107    let n = data.nrows();
108    if n == 0 || y.len() != n {
109        return Err(FdarError::InvalidDimension {
110            parameter: "data/y",
111            expected: "n > 0 and y.len() == n".to_string(),
112            actual: format!("n={}, y.len()={}", n, y.len()),
113        });
114    }
115    if ncomp == 0 {
116        return Err(FdarError::InvalidParameter {
117            parameter: "ncomp",
118            message: "must be > 0".to_string(),
119        });
120    }
121
122    let (labels, g) = remap_labels(y);
123    if g < 2 {
124        return Err(FdarError::InvalidParameter {
125            parameter: "y",
126            message: format!("need at least 2 classes, got {g}"),
127        });
128    }
129
130    let (features, _mean, _rotation) = build_feature_matrix(data, scalar_covariates, ncomp)?;
131    let d = features.ncols();
132    let (class_means, cov, priors) = lda_params(&features, &labels, g);
133    let chol = cholesky_d(&cov, d)?;
134
135    let predicted = lda_predict(&features, &class_means, &chol, &priors, g);
136    let accuracy = compute_accuracy(&labels, &predicted);
137    let confusion = confusion_matrix(&labels, &predicted, g);
138
139    Ok(ClassifResult {
140        predicted,
141        probabilities: None,
142        accuracy,
143        confusion,
144        n_classes: g,
145        ncomp: features.ncols().min(ncomp),
146    })
147}