Skip to main content

fdars_core/spm/
mfpca.rs

1//! Multivariate Functional Principal Component Analysis (MFPCA).
2//!
3//! Extends univariate FPCA to handle multiple functional variables
4//! observed on potentially different grids. Variables are optionally
5//! weighted by their inverse standard deviation before joint SVD.
6
7use crate::error::FdarError;
8use crate::matrix::FdMatrix;
9use nalgebra::SVD;
10
11/// Configuration for multivariate FPCA.
12#[derive(Debug, Clone, PartialEq)]
13pub struct MfpcaConfig {
14    /// Number of principal components to extract (default 5).
15    pub ncomp: usize,
16    /// Whether to weight each variable by 1/std_dev before SVD (default true).
17    pub weighted: bool,
18}
19
20impl Default for MfpcaConfig {
21    fn default() -> Self {
22        Self {
23            ncomp: 5,
24            weighted: true,
25        }
26    }
27}
28
29/// Result of multivariate FPCA.
30#[derive(Debug, Clone, PartialEq)]
31#[non_exhaustive]
32pub struct MfpcaResult {
33    /// Score matrix (n x ncomp).
34    pub scores: FdMatrix,
35    /// Eigenfunctions split by variable (one FdMatrix per variable, each m_p x ncomp).
36    pub eigenfunctions: Vec<FdMatrix>,
37    /// Eigenvalues (length ncomp): squared singular values divided by (n-1).
38    pub eigenvalues: Vec<f64>,
39    /// Per-variable mean functions.
40    pub means: Vec<Vec<f64>>,
41    /// Per-variable standard deviations (sqrt of mean column variance).
42    pub scales: Vec<f64>,
43    /// Grid sizes per variable.
44    pub grid_sizes: Vec<usize>,
45    /// Combined rotation matrix (sum(m_p) x ncomp) — internal use for projection.
46    pub(super) combined_rotation: FdMatrix,
47}
48
49impl MfpcaResult {
50    /// Project new multivariate functional data onto the MFPCA score space.
51    ///
52    /// Each element of `new_data` is an n_new x m_p matrix for variable p.
53    ///
54    /// # Errors
55    ///
56    /// Returns [`FdarError::InvalidDimension`] if the number of variables or
57    /// their grid sizes do not match the training data.
58    pub fn project(&self, new_data: &[&FdMatrix]) -> Result<FdMatrix, FdarError> {
59        if new_data.len() != self.means.len() {
60            return Err(FdarError::InvalidDimension {
61                parameter: "new_data",
62                expected: format!("{} variables", self.means.len()),
63                actual: format!("{} variables", new_data.len()),
64            });
65        }
66
67        let n_new = new_data[0].nrows();
68        let ncomp = self.scores.ncols();
69        let total_cols: usize = self.grid_sizes.iter().sum();
70
71        // Center, scale, and stack
72        let mut stacked = FdMatrix::zeros(n_new, total_cols);
73        let mut col_offset = 0;
74        for (p, &var) in new_data.iter().enumerate() {
75            let m_p = self.grid_sizes[p];
76            if var.ncols() != m_p {
77                return Err(FdarError::InvalidDimension {
78                    parameter: "new_data",
79                    expected: format!("{m_p} columns for variable {p}"),
80                    actual: format!("{} columns", var.ncols()),
81                });
82            }
83            if var.nrows() != n_new {
84                return Err(FdarError::InvalidDimension {
85                    parameter: "new_data",
86                    expected: format!("{n_new} rows for all variables"),
87                    actual: format!("{} rows for variable {p}", var.nrows()),
88                });
89            }
90            let scale = if self.scales[p] > 1e-15 {
91                self.scales[p]
92            } else {
93                1.0
94            };
95            for i in 0..n_new {
96                for j in 0..m_p {
97                    let centered = var[(i, j)] - self.means[p][j];
98                    stacked[(i, col_offset + j)] = centered / scale;
99                }
100            }
101            col_offset += m_p;
102        }
103
104        // Multiply by combined rotation: scores = stacked * rotation
105        let mut scores = FdMatrix::zeros(n_new, ncomp);
106        for i in 0..n_new {
107            for k in 0..ncomp {
108                let mut sum = 0.0;
109                for j in 0..total_cols {
110                    sum += stacked[(i, j)] * self.combined_rotation[(j, k)];
111                }
112                scores[(i, k)] = sum;
113            }
114        }
115        Ok(scores)
116    }
117
118    /// Reconstruct multivariate functional data from scores.
119    ///
120    /// Returns one FdMatrix per variable (n x m_p).
121    ///
122    /// # Errors
123    ///
124    /// Returns [`FdarError::InvalidParameter`] if `ncomp` exceeds available components.
125    pub fn reconstruct(&self, scores: &FdMatrix, ncomp: usize) -> Result<Vec<FdMatrix>, FdarError> {
126        let max_comp = self.combined_rotation.ncols().min(scores.ncols());
127        if ncomp == 0 || ncomp > max_comp {
128            return Err(FdarError::InvalidParameter {
129                parameter: "ncomp",
130                message: format!("ncomp={ncomp} must be in 1..={max_comp}"),
131            });
132        }
133
134        let n = scores.nrows();
135        let total_cols: usize = self.grid_sizes.iter().sum();
136
137        // Reconstruct in combined space: stacked_recon = scores * rotation^T
138        let mut stacked = FdMatrix::zeros(n, total_cols);
139        for i in 0..n {
140            for j in 0..total_cols {
141                let mut val = 0.0;
142                for k in 0..ncomp {
143                    val += scores[(i, k)] * self.combined_rotation[(j, k)];
144                }
145                stacked[(i, j)] = val;
146            }
147        }
148
149        // Split by variable, un-scale, and add means
150        let mut result = Vec::with_capacity(self.means.len());
151        let mut col_offset = 0;
152        for (p, m_p) in self.grid_sizes.iter().enumerate() {
153            let scale = if self.scales[p] > 1e-15 {
154                self.scales[p]
155            } else {
156                1.0
157            };
158            let mut var_mat = FdMatrix::zeros(n, *m_p);
159            for i in 0..n {
160                for j in 0..*m_p {
161                    var_mat[(i, j)] = stacked[(i, col_offset + j)] * scale + self.means[p][j];
162                }
163            }
164            col_offset += m_p;
165            result.push(var_mat);
166        }
167        Ok(result)
168    }
169}
170
171/// Perform multivariate FPCA on multiple functional variables.
172///
173/// # Arguments
174/// * `variables` - Slice of n x m_p matrices, one per functional variable
175/// * `config` - MFPCA configuration
176///
177/// # Errors
178///
179/// Returns [`FdarError::InvalidDimension`] if no variables are provided or
180/// variables have inconsistent row counts. Returns [`FdarError::ComputationFailed`]
181/// if the SVD fails.
182#[must_use = "expensive computation whose result should not be discarded"]
183pub fn mfpca(variables: &[&FdMatrix], config: &MfpcaConfig) -> Result<MfpcaResult, FdarError> {
184    if variables.is_empty() {
185        return Err(FdarError::InvalidDimension {
186            parameter: "variables",
187            expected: "at least 1 variable".to_string(),
188            actual: "0 variables".to_string(),
189        });
190    }
191
192    let n = variables[0].nrows();
193    if n < 2 {
194        return Err(FdarError::InvalidDimension {
195            parameter: "variables",
196            expected: "at least 2 observations".to_string(),
197            actual: format!("{n} observations"),
198        });
199    }
200
201    for (p, var) in variables.iter().enumerate() {
202        if var.nrows() != n {
203            return Err(FdarError::InvalidDimension {
204                parameter: "variables",
205                expected: format!("{n} rows for all variables"),
206                actual: format!("{} rows for variable {p}", var.nrows()),
207            });
208        }
209    }
210
211    let grid_sizes: Vec<usize> = variables.iter().map(|v| v.ncols()).collect();
212    let total_cols: usize = grid_sizes.iter().sum();
213    let ncomp = config.ncomp.min(n).min(total_cols);
214
215    // Step 1: Center each variable and compute scale
216    let mut means: Vec<Vec<f64>> = Vec::with_capacity(variables.len());
217    let mut scales: Vec<f64> = Vec::with_capacity(variables.len());
218
219    for var in variables.iter() {
220        let (_, m_p) = var.shape();
221        let mut mean = vec![0.0; m_p];
222        for j in 0..m_p {
223            let col = var.column(j);
224            mean[j] = col.iter().sum::<f64>() / n as f64;
225        }
226
227        // Scale = sqrt(mean of column variances)
228        let mut mean_var = 0.0;
229        for j in 0..m_p {
230            let col = var.column(j);
231            let var_j: f64 =
232                col.iter().map(|&v| (v - mean[j]).powi(2)).sum::<f64>() / (n as f64 - 1.0);
233            mean_var += var_j;
234        }
235        mean_var /= m_p as f64;
236        let scale = mean_var.sqrt();
237
238        means.push(mean);
239        scales.push(scale);
240    }
241
242    // Step 2: Build stacked matrix (n x total_cols)
243    let mut stacked = FdMatrix::zeros(n, total_cols);
244    let mut col_offset = 0;
245    for (p, var) in variables.iter().enumerate() {
246        let m_p = grid_sizes[p];
247        let scale = if config.weighted && scales[p] > 1e-15 {
248            scales[p]
249        } else {
250            1.0
251        };
252        // Update scales to reflect actual scaling applied
253        if !config.weighted {
254            scales[p] = 1.0;
255        }
256        for i in 0..n {
257            for j in 0..m_p {
258                let centered = var[(i, j)] - means[p][j];
259                stacked[(i, col_offset + j)] = centered / scale;
260            }
261        }
262        col_offset += m_p;
263    }
264
265    // Step 3: SVD on stacked matrix
266    let svd = SVD::new(stacked.to_dmatrix(), true, true);
267
268    let v_t = svd
269        .v_t
270        .as_ref()
271        .ok_or_else(|| FdarError::ComputationFailed {
272            operation: "MFPCA SVD",
273            detail: "SVD failed to produce V_t matrix".to_string(),
274        })?;
275
276    let u = svd.u.as_ref().ok_or_else(|| FdarError::ComputationFailed {
277        operation: "MFPCA SVD",
278        detail: "SVD failed to produce U matrix".to_string(),
279    })?;
280
281    // Step 4: Extract components
282    let singular_values: Vec<f64> = svd.singular_values.iter().take(ncomp).copied().collect();
283
284    // Eigenvalues = sv^2 / (n - 1)
285    let eigenvalues: Vec<f64> = singular_values
286        .iter()
287        .map(|&sv| sv * sv / (n as f64 - 1.0))
288        .collect();
289
290    // Combined rotation: V[:, :ncomp] (total_cols x ncomp)
291    let mut combined_rotation = FdMatrix::zeros(total_cols, ncomp);
292    for k in 0..ncomp {
293        for j in 0..total_cols {
294            combined_rotation[(j, k)] = v_t[(k, j)];
295        }
296    }
297
298    // Scores: U * S (n x ncomp)
299    let mut scores = FdMatrix::zeros(n, ncomp);
300    for k in 0..ncomp {
301        let sv_k = singular_values[k];
302        for i in 0..n {
303            scores[(i, k)] = u[(i, k)] * sv_k;
304        }
305    }
306
307    // Step 5: Split eigenfunctions by variable
308    let mut eigenfunctions = Vec::with_capacity(variables.len());
309    let mut col_off = 0;
310    for m_p in &grid_sizes {
311        let mut ef = FdMatrix::zeros(*m_p, ncomp);
312        for k in 0..ncomp {
313            for j in 0..*m_p {
314                ef[(j, k)] = combined_rotation[(col_off + j, k)];
315            }
316        }
317        col_off += m_p;
318        eigenfunctions.push(ef);
319    }
320
321    Ok(MfpcaResult {
322        scores,
323        eigenfunctions,
324        eigenvalues,
325        means,
326        scales,
327        grid_sizes,
328        combined_rotation,
329    })
330}