Skip to main content

fdars_core/gmm/
mod.rs

1//! Model-based functional clustering via Gaussian mixture models.
2//!
3//! Implements the fdaMocca approach (Arnqvist & Sjöstedt de Luna, 2023):
4//! project curves onto a basis, concatenate with scalar covariates, and fit
5//! a Gaussian mixture using EM.
6//!
7//! Key functions:
8//! - [`gmm_cluster`] — Main clustering entry point with automatic K selection
9//! - [`gmm_em`] — Single-K EM algorithm
10//! - [`predict_gmm`] — Assign new observations to clusters
11
12use crate::matrix::FdMatrix;
13
14pub mod cluster;
15pub mod covariance;
16pub mod em;
17pub mod init;
18#[cfg(test)]
19mod tests;
20
21// ---------------------------------------------------------------------------
22// Shared types
23// ---------------------------------------------------------------------------
24
25/// Covariance structure for GMM components.
26#[derive(Clone, Copy, Debug, PartialEq)]
27#[non_exhaustive]
28pub enum CovType {
29    /// Full covariance matrix (d² parameters per component)
30    Full,
31    /// Diagonal covariance (d parameters per component)
32    Diagonal,
33}
34
35/// Result from a single GMM fit with fixed K.
36#[derive(Debug, Clone)]
37#[non_exhaustive]
38pub struct GmmResult {
39    /// Hard cluster assignments (length n)
40    pub cluster: Vec<usize>,
41    /// Posterior membership probabilities (n x K)
42    pub membership: FdMatrix,
43    /// Component means (K x d)
44    pub means: Vec<Vec<f64>>,
45    /// Component covariances: for Full, each is d×d flattened; for Diagonal, each is length d
46    pub covariances: Vec<Vec<f64>>,
47    /// Mixing proportions (length K)
48    pub weights: Vec<f64>,
49    /// Log-likelihood at convergence
50    pub log_likelihood: f64,
51    /// BIC value
52    pub bic: f64,
53    /// ICL value (BIC penalized by entropy)
54    pub icl: f64,
55    /// Number of EM iterations
56    pub iterations: usize,
57    /// Whether EM converged
58    pub converged: bool,
59    /// Number of clusters
60    pub k: usize,
61    /// Feature dimension (basis coefficients + covariates)
62    pub d: usize,
63}
64
65/// Result from automatic K selection.
66#[derive(Debug, Clone)]
67#[non_exhaustive]
68pub struct GmmClusterResult {
69    /// Best GMM result (by BIC or ICL)
70    pub best: GmmResult,
71    /// BIC values for each K tried
72    pub bic_values: Vec<(usize, f64)>,
73    /// ICL values for each K tried
74    pub icl_values: Vec<(usize, f64)>,
75}
76
77// Re-export all public items
78pub use cluster::{gmm_cluster, gmm_cluster_with_config, predict_gmm, GmmClusterConfig};
79pub use em::gmm_em;