fdars_core/gmm/mod.rs
1//! Model-based functional clustering via Gaussian mixture models.
2//!
3//! Implements the fdaMocca approach (Arnqvist & Sjöstedt de Luna, 2023):
4//! project curves onto a basis, concatenate with scalar covariates, and fit
5//! a Gaussian mixture using EM.
6//!
7//! Key functions:
8//! - [`gmm_cluster`] — Main clustering entry point with automatic K selection
9//! - [`gmm_em`] — Single-K EM algorithm
10//! - [`predict_gmm`] — Assign new observations to clusters
11
12use crate::matrix::FdMatrix;
13
14pub mod cluster;
15pub mod covariance;
16pub mod em;
17pub mod init;
18#[cfg(test)]
19mod tests;
20
21// ---------------------------------------------------------------------------
22// Shared types
23// ---------------------------------------------------------------------------
24
25/// Covariance structure for GMM components.
26#[derive(Clone, Copy, Debug, PartialEq)]
27#[non_exhaustive]
28pub enum CovType {
29 /// Full covariance matrix (d² parameters per component)
30 Full,
31 /// Diagonal covariance (d parameters per component)
32 Diagonal,
33}
34
35/// Result from a single GMM fit with fixed K.
36#[derive(Debug, Clone)]
37#[non_exhaustive]
38pub struct GmmResult {
39 /// Hard cluster assignments (length n)
40 pub cluster: Vec<usize>,
41 /// Posterior membership probabilities (n x K)
42 pub membership: FdMatrix,
43 /// Component means (K x d)
44 pub means: Vec<Vec<f64>>,
45 /// Component covariances: for Full, each is d×d flattened; for Diagonal, each is length d
46 pub covariances: Vec<Vec<f64>>,
47 /// Mixing proportions (length K)
48 pub weights: Vec<f64>,
49 /// Log-likelihood at convergence
50 pub log_likelihood: f64,
51 /// BIC value
52 pub bic: f64,
53 /// ICL value (BIC penalized by entropy)
54 pub icl: f64,
55 /// Number of EM iterations
56 pub iterations: usize,
57 /// Whether EM converged
58 pub converged: bool,
59 /// Number of clusters
60 pub k: usize,
61 /// Feature dimension (basis coefficients + covariates)
62 pub d: usize,
63}
64
65/// Result from automatic K selection.
66#[derive(Debug, Clone)]
67#[non_exhaustive]
68pub struct GmmClusterResult {
69 /// Best GMM result (by BIC or ICL)
70 pub best: GmmResult,
71 /// BIC values for each K tried
72 pub bic_values: Vec<(usize, f64)>,
73 /// ICL values for each K tried
74 pub icl_values: Vec<(usize, f64)>,
75}
76
77// Re-export all public items
78pub use cluster::{gmm_cluster, gmm_cluster_with_config, predict_gmm, GmmClusterConfig};
79pub use em::gmm_em;