Skip to main content

fdars_core/classification/
lda.rs

1//! LDA: Linear Discriminant Analysis internals.
2
3use crate::error::FdarError;
4use crate::linalg::{cholesky_d, mahalanobis_sq};
5use crate::matrix::FdMatrix;
6
7use super::{
8    build_feature_matrix, class_means_and_priors, compute_accuracy, confusion_matrix, remap_labels,
9    ClassifResult,
10};
11
12/// Compute pooled within-class covariance (symmetric, regularized).
13fn pooled_within_cov(
14    features: &FdMatrix,
15    labels: &[usize],
16    class_means: &[Vec<f64>],
17    g: usize,
18) -> Vec<f64> {
19    let n = features.nrows();
20    let d = features.ncols();
21    let mut cov = vec![0.0; d * d];
22    for i in 0..n {
23        let c = labels[i];
24        for r in 0..d {
25            let dr = features[(i, r)] - class_means[c][r];
26            for s in r..d {
27                let val = dr * (features[(i, s)] - class_means[c][s]);
28                cov[r * d + s] += val;
29                if r != s {
30                    cov[s * d + r] += val;
31                }
32            }
33        }
34    }
35    let scale = (n - g).max(1) as f64;
36    for v in &mut cov {
37        *v /= scale;
38    }
39    for j in 0..d {
40        cov[j * d + j] += 1e-6;
41    }
42    cov
43}
44
45/// Compute per-class means and pooled within-class covariance.
46pub(crate) fn lda_params(
47    features: &FdMatrix,
48    labels: &[usize],
49    g: usize,
50) -> (Vec<Vec<f64>>, Vec<f64>, Vec<f64>) {
51    let (class_means, _counts, priors) = class_means_and_priors(features, labels, g);
52    let cov = pooled_within_cov(features, labels, &class_means, g);
53    (class_means, cov, priors)
54}
55
56/// LDA prediction: assign to class minimizing Mahalanobis distance (with prior).
57pub(crate) fn lda_predict(
58    features: &FdMatrix,
59    class_means: &[Vec<f64>],
60    cov_chol: &[f64],
61    priors: &[f64],
62    g: usize,
63) -> Vec<usize> {
64    let n = features.nrows();
65    let d = features.ncols();
66
67    (0..n)
68        .map(|i| {
69            let xi: Vec<f64> = (0..d).map(|j| features[(i, j)]).collect();
70            let mut best_class = 0;
71            let mut best_score = f64::NEG_INFINITY;
72            for c in 0..g {
73                let maha = mahalanobis_sq(&xi, &class_means[c], cov_chol, d);
74                let score = priors[c].max(1e-15).ln() - 0.5 * maha;
75                if score > best_score {
76                    best_score = score;
77                    best_class = c;
78                }
79            }
80            best_class
81        })
82        .collect()
83}
84
85/// FPC + LDA classification.
86///
87/// # Arguments
88/// * `data` — Functional data (n × m)
89/// * `y` — Class labels (length n)
90/// * `scalar_covariates` — Optional scalar covariates (n × p)
91/// * `ncomp` — Number of FPC components
92///
93/// # Errors
94///
95/// Returns [`FdarError::InvalidDimension`] if `data` has zero rows or `y.len() != n`.
96/// Returns [`FdarError::InvalidParameter`] if `ncomp` is zero.
97/// Returns [`FdarError::InvalidParameter`] if `y` contains fewer than 2 distinct classes.
98/// Returns [`FdarError::ComputationFailed`] if the SVD decomposition in FPCA fails.
99/// Returns [`FdarError::ComputationFailed`] if the pooled covariance Cholesky factorization fails.
100///
101/// # Examples
102///
103/// ```
104/// use fdars_core::matrix::FdMatrix;
105/// use fdars_core::classification::lda::fclassif_lda;
106///
107/// let data = FdMatrix::from_column_major(
108///     (0..100).map(|i| (i as f64 * 0.1).sin()).collect(),
109///     10, 10,
110/// ).unwrap();
111/// let y = vec![0, 0, 0, 0, 0, 1, 1, 1, 1, 1];
112/// let result = fclassif_lda(&data, &y, None, 3).unwrap();
113/// assert_eq!(result.predicted.len(), 10);
114/// assert_eq!(result.n_classes, 2);
115/// ```
116#[must_use = "expensive computation whose result should not be discarded"]
117pub fn fclassif_lda(
118    data: &FdMatrix,
119    y: &[usize],
120    scalar_covariates: Option<&FdMatrix>,
121    ncomp: usize,
122) -> Result<ClassifResult, FdarError> {
123    let n = data.nrows();
124    if n == 0 || y.len() != n {
125        return Err(FdarError::InvalidDimension {
126            parameter: "data/y",
127            expected: "n > 0 and y.len() == n".to_string(),
128            actual: format!("n={}, y.len()={}", n, y.len()),
129        });
130    }
131    if ncomp == 0 {
132        return Err(FdarError::InvalidParameter {
133            parameter: "ncomp",
134            message: "must be > 0".to_string(),
135        });
136    }
137
138    let (labels, g) = remap_labels(y);
139    if g < 2 {
140        return Err(FdarError::InvalidParameter {
141            parameter: "y",
142            message: format!("need at least 2 classes, got {g}"),
143        });
144    }
145
146    let (features, _mean, _rotation) = build_feature_matrix(data, scalar_covariates, ncomp)?;
147    let d = features.ncols();
148    let (class_means, cov, priors) = lda_params(&features, &labels, g);
149    let chol = cholesky_d(&cov, d)?;
150
151    let predicted = lda_predict(&features, &class_means, &chol, &priors, g);
152    let accuracy = compute_accuracy(&labels, &predicted);
153    let confusion = confusion_matrix(&labels, &predicted, g);
154
155    Ok(ClassifResult {
156        predicted,
157        probabilities: None,
158        accuracy,
159        confusion,
160        n_classes: g,
161        ncomp: features.ncols().min(ncomp),
162    })
163}