Skip to main content

fdars_core/spm/
mfpca.rs

1//! Multivariate Functional Principal Component Analysis (MFPCA).
2//!
3//! Extends univariate FPCA to handle multiple functional variables
4//! observed on potentially different grids. Variables are optionally
5//! weighted by their inverse standard deviation before joint SVD.
6//!
7//! # Algorithm
8//!
9//! 1. Center each variable by its column means.
10//! 2. Standardize each variable by its scale (sqrt of mean column variance)
11//!    when `weighted = true`. This ensures variables on different scales
12//!    contribute equally to the joint decomposition.
13//! 3. Horizontally concatenate the centered/scaled variables into an
14//!    n × (sum m_p) matrix and perform truncated SVD.
15//! 4. Extract scores as U * S and eigenfunctions from V. Eigenfunctions
16//!    are back-scaled by the per-variable scale factor when reconstructing,
17//!    so that reconstruction returns values on the original measurement scale.
18//!
19//! # Projection accuracy
20//!
21//! The projection error ||X - X_hat||² is bounded by sum_{l > ncomp} lambda_l
22//! (the sum of discarded eigenvalues). This provides an a priori error bound
23//! for choosing ncomp: retain enough components so that the discarded
24//! eigenvalue tail is acceptably small relative to the total variance.
25//!
26//! # Scale threshold
27//!
28//! Variables with scale < 1e-12 (relative to the maximum scale across all
29//! variables) are treated as constant and receive unit weight (scale = 1.0).
30//! This prevents numerical issues from near-zero denominators in the
31//! standardization step.
32//!
33//! # References
34//!
35//! - Happ, C. & Greven, S. (2018). Multivariate functional principal component
36//!   analysis for data observed on different (dimensional) domains. *Journal of
37//!   the American Statistical Association*, 113(522), 649-659. The weighting
38//!   scheme follows Section 2.2 (standardization by variable-specific scale)
39//!   and the eigendecomposition is described in Section 2.3, Eq. 3-5.
40
41use crate::error::FdarError;
42use crate::matrix::FdMatrix;
43use nalgebra::SVD;
44
45/// Configuration for multivariate FPCA.
46#[derive(Debug, Clone, PartialEq)]
47pub struct MfpcaConfig {
48    /// Number of principal components to extract (default 5).
49    pub ncomp: usize,
50    /// Whether to weight each variable by 1/std_dev before SVD (default true).
51    pub weighted: bool,
52}
53
54impl Default for MfpcaConfig {
55    fn default() -> Self {
56        Self {
57            ncomp: 5,
58            weighted: true,
59        }
60    }
61}
62
63/// Result of multivariate FPCA.
64#[derive(Debug, Clone, PartialEq)]
65#[non_exhaustive]
66pub struct MfpcaResult {
67    /// Score matrix (n x ncomp).
68    pub scores: FdMatrix,
69    /// Eigenfunctions split by variable (one FdMatrix per variable, each m_p x ncomp).
70    pub eigenfunctions: Vec<FdMatrix>,
71    /// Eigenvalues (length ncomp): squared singular values divided by (n-1).
72    pub eigenvalues: Vec<f64>,
73    /// Per-variable mean functions.
74    pub means: Vec<Vec<f64>>,
75    /// Per-variable standard deviations (sqrt of mean column variance).
76    pub scales: Vec<f64>,
77    /// Grid sizes per variable.
78    pub grid_sizes: Vec<usize>,
79    /// Combined rotation matrix (sum(m_p) x ncomp) — internal use for projection.
80    pub(super) combined_rotation: FdMatrix,
81    /// Threshold below which a variable's scale is treated as 1.0 (avoids division by near-zero).
82    pub(super) scale_threshold: f64,
83}
84
85impl MfpcaResult {
86    /// Project new multivariate functional data onto the MFPCA score space.
87    ///
88    /// Each element of `new_data` is an n_new x m_p matrix for variable p.
89    ///
90    /// # Errors
91    ///
92    /// Returns [`FdarError::InvalidDimension`] if the number of variables or
93    /// their grid sizes do not match the training data.
94    pub fn project(&self, new_data: &[&FdMatrix]) -> Result<FdMatrix, FdarError> {
95        if new_data.len() != self.means.len() {
96            return Err(FdarError::InvalidDimension {
97                parameter: "new_data",
98                expected: format!("{} variables", self.means.len()),
99                actual: format!("{} variables", new_data.len()),
100            });
101        }
102
103        // Early check: total columns across all variables must match the
104        // combined rotation matrix rows (i.e., the sum of training grid sizes).
105        let total_input_cols: usize = new_data.iter().map(|v| v.ncols()).sum();
106        let expected_total: usize = self.grid_sizes.iter().sum();
107        if total_input_cols != expected_total {
108            return Err(FdarError::InvalidDimension {
109                parameter: "new_data",
110                expected: format!("{expected_total} total columns across all variables"),
111                actual: format!("{total_input_cols} total columns"),
112            });
113        }
114
115        let n_new = new_data[0].nrows();
116        let ncomp = self.scores.ncols();
117        let total_cols: usize = self.grid_sizes.iter().sum();
118
119        // Center, scale, and stack
120        let mut stacked = FdMatrix::zeros(n_new, total_cols);
121        let mut col_offset = 0;
122        for (p, &var) in new_data.iter().enumerate() {
123            let m_p = self.grid_sizes[p];
124            if var.ncols() != m_p {
125                return Err(FdarError::InvalidDimension {
126                    parameter: "new_data",
127                    expected: format!("{m_p} columns for variable {p}"),
128                    actual: format!("{} columns", var.ncols()),
129                });
130            }
131            if var.nrows() != n_new {
132                return Err(FdarError::InvalidDimension {
133                    parameter: "new_data",
134                    expected: format!("{n_new} rows for all variables"),
135                    actual: format!("{} rows for variable {p}", var.nrows()),
136                });
137            }
138            let scale = if self.scales[p] >= self.scale_threshold {
139                self.scales[p]
140            } else {
141                1.0
142            };
143            for i in 0..n_new {
144                for j in 0..m_p {
145                    let centered = var[(i, j)] - self.means[p][j];
146                    stacked[(i, col_offset + j)] = centered / scale;
147                }
148            }
149            col_offset += m_p;
150        }
151
152        // Multiply by combined rotation: scores = stacked * rotation
153        let mut scores = FdMatrix::zeros(n_new, ncomp);
154        for i in 0..n_new {
155            for k in 0..ncomp {
156                let mut sum = 0.0;
157                for j in 0..total_cols {
158                    sum += stacked[(i, j)] * self.combined_rotation[(j, k)];
159                }
160                scores[(i, k)] = sum;
161            }
162        }
163        Ok(scores)
164    }
165
166    /// Reconstruct multivariate functional data from scores.
167    ///
168    /// Returns one FdMatrix per variable (n x m_p).
169    ///
170    /// # Errors
171    ///
172    /// Returns [`FdarError::InvalidParameter`] if `ncomp` exceeds available components.
173    pub fn reconstruct(&self, scores: &FdMatrix, ncomp: usize) -> Result<Vec<FdMatrix>, FdarError> {
174        let max_comp = self.combined_rotation.ncols().min(scores.ncols());
175        if ncomp == 0 || ncomp > max_comp {
176            return Err(FdarError::InvalidParameter {
177                parameter: "ncomp",
178                message: format!("ncomp={ncomp} must be in 1..={max_comp}"),
179            });
180        }
181
182        let n = scores.nrows();
183        let total_cols: usize = self.grid_sizes.iter().sum();
184
185        // Reconstruct in combined space: stacked_recon = scores * rotation^T
186        let mut stacked = FdMatrix::zeros(n, total_cols);
187        for i in 0..n {
188            for j in 0..total_cols {
189                let mut val = 0.0;
190                for k in 0..ncomp {
191                    val += scores[(i, k)] * self.combined_rotation[(j, k)];
192                }
193                stacked[(i, j)] = val;
194            }
195        }
196
197        // Split by variable, un-scale, and add means
198        let mut result = Vec::with_capacity(self.means.len());
199        let mut col_offset = 0;
200        for (p, m_p) in self.grid_sizes.iter().enumerate() {
201            let scale = if self.scales[p] >= self.scale_threshold {
202                self.scales[p]
203            } else {
204                1.0
205            };
206            let mut var_mat = FdMatrix::zeros(n, *m_p);
207            for i in 0..n {
208                for j in 0..*m_p {
209                    var_mat[(i, j)] = stacked[(i, col_offset + j)] * scale + self.means[p][j];
210                }
211            }
212            col_offset += m_p;
213            result.push(var_mat);
214        }
215        Ok(result)
216    }
217}
218
219/// Perform multivariate FPCA on multiple functional variables.
220///
221/// # Arguments
222/// * `variables` - Slice of n x m_p matrices, one per functional variable
223/// * `config` - MFPCA configuration
224///
225/// # Example
226///
227/// ```
228/// use fdars_core::matrix::FdMatrix;
229/// use fdars_core::spm::mfpca::{mfpca, MfpcaConfig};
230/// let var1 = FdMatrix::from_column_major(vec![1.0,2.0,3.0,4.0,5.0,6.0], 3, 2).unwrap();
231/// let var2 = FdMatrix::from_column_major(vec![0.5,1.5,2.5,3.5,4.5,5.5], 3, 2).unwrap();
232/// let config = MfpcaConfig { ncomp: 2, weighted: true };
233/// let result = mfpca(&[&var1, &var2], &config).unwrap();
234/// assert_eq!(result.eigenvalues.len(), 2);
235/// assert!(result.eigenvalues[0] >= result.eigenvalues[1]);
236/// ```
237///
238/// # Errors
239///
240/// Returns [`FdarError::InvalidDimension`] if no variables are provided or
241/// variables have inconsistent row counts. Returns [`FdarError::ComputationFailed`]
242/// if the SVD fails.
243#[must_use = "expensive computation whose result should not be discarded"]
244pub fn mfpca(variables: &[&FdMatrix], config: &MfpcaConfig) -> Result<MfpcaResult, FdarError> {
245    if variables.is_empty() {
246        return Err(FdarError::InvalidDimension {
247            parameter: "variables",
248            expected: "at least 1 variable".to_string(),
249            actual: "0 variables".to_string(),
250        });
251    }
252
253    let n = variables[0].nrows();
254    if n < 2 {
255        return Err(FdarError::InvalidDimension {
256            parameter: "variables",
257            expected: "at least 2 observations".to_string(),
258            actual: format!("{n} observations"),
259        });
260    }
261
262    for (p, var) in variables.iter().enumerate() {
263        if var.nrows() != n {
264            return Err(FdarError::InvalidDimension {
265                parameter: "variables",
266                expected: format!("{n} rows for all variables"),
267                actual: format!("{} rows for variable {p}", var.nrows()),
268            });
269        }
270    }
271
272    let grid_sizes: Vec<usize> = variables.iter().map(|v| v.ncols()).collect();
273    let total_cols: usize = grid_sizes.iter().sum();
274    let ncomp = config.ncomp.min(n).min(total_cols);
275
276    // Step 1: Center each variable and compute scale
277    let mut means: Vec<Vec<f64>> = Vec::with_capacity(variables.len());
278    let mut scales: Vec<f64> = Vec::with_capacity(variables.len());
279
280    for var in variables.iter() {
281        let (_, m_p) = var.shape();
282        let mut mean = vec![0.0; m_p];
283        for j in 0..m_p {
284            let col = var.column(j);
285            mean[j] = col.iter().sum::<f64>() / n as f64;
286        }
287
288        // Scale = sqrt(mean of column variances)
289        let mut mean_var = 0.0;
290        for j in 0..m_p {
291            let col = var.column(j);
292            let var_j: f64 =
293                col.iter().map(|&v| (v - mean[j]).powi(2)).sum::<f64>() / (n as f64 - 1.0);
294            mean_var += var_j;
295        }
296        mean_var /= m_p as f64;
297        let scale = mean_var.sqrt();
298
299        means.push(mean);
300        scales.push(scale);
301    }
302
303    // Use relative threshold for scale: 1e-12 * max(scales).
304    // Variables with scale below this threshold contribute negligible variance
305    // and are effectively constant. Treating them as unscaled avoids division
306    // by near-zero values.
307    let max_scale = scales.iter().cloned().fold(0.0_f64, f64::max);
308    let scale_threshold = 1e-12 * max_scale.max(1e-15); // floor to avoid 0
309
310    // Step 2: Build stacked matrix (n x total_cols)
311    let mut stacked = FdMatrix::zeros(n, total_cols);
312    let mut col_offset = 0;
313    for (p, var) in variables.iter().enumerate() {
314        let m_p = grid_sizes[p];
315        let scale = if config.weighted && scales[p] > scale_threshold {
316            scales[p]
317        } else {
318            1.0
319        };
320        // Update scales to reflect actual scaling applied
321        if !config.weighted {
322            scales[p] = 1.0;
323        }
324        for i in 0..n {
325            for j in 0..m_p {
326                let centered = var[(i, j)] - means[p][j];
327                stacked[(i, col_offset + j)] = centered / scale;
328            }
329        }
330        col_offset += m_p;
331    }
332
333    // Step 3: SVD on stacked matrix
334    let svd = SVD::new(stacked.to_dmatrix(), true, true);
335
336    let v_t = svd
337        .v_t
338        .as_ref()
339        .ok_or_else(|| FdarError::ComputationFailed {
340            operation: "MFPCA SVD",
341            detail: "SVD failed to produce V_t matrix".to_string(),
342        })?;
343
344    let u = svd.u.as_ref().ok_or_else(|| FdarError::ComputationFailed {
345        operation: "MFPCA SVD",
346        detail: "SVD failed to produce U matrix".to_string(),
347    })?;
348
349    // Step 4: Extract components
350    let singular_values: Vec<f64> = svd.singular_values.iter().take(ncomp).copied().collect();
351
352    // Eigenvalues = sv^2 / (n - 1)
353    let eigenvalues: Vec<f64> = singular_values
354        .iter()
355        .map(|&sv| sv * sv / (n as f64 - 1.0))
356        .collect();
357
358    // Combined rotation: V[:, :ncomp] (total_cols x ncomp)
359    let mut combined_rotation = FdMatrix::zeros(total_cols, ncomp);
360    for k in 0..ncomp {
361        for j in 0..total_cols {
362            combined_rotation[(j, k)] = v_t[(k, j)];
363        }
364    }
365
366    // Scores: U * S (n x ncomp)
367    let mut scores = FdMatrix::zeros(n, ncomp);
368    for k in 0..ncomp {
369        let sv_k = singular_values[k];
370        for i in 0..n {
371            scores[(i, k)] = u[(i, k)] * sv_k;
372        }
373    }
374
375    // Step 5: Split eigenfunctions by variable
376    let mut eigenfunctions = Vec::with_capacity(variables.len());
377    let mut col_off = 0;
378    for m_p in &grid_sizes {
379        let mut ef = FdMatrix::zeros(*m_p, ncomp);
380        for k in 0..ncomp {
381            for j in 0..*m_p {
382                ef[(j, k)] = combined_rotation[(col_off + j, k)];
383            }
384        }
385        col_off += m_p;
386        eigenfunctions.push(ef);
387    }
388
389    Ok(MfpcaResult {
390        scores,
391        eigenfunctions,
392        eigenvalues,
393        means,
394        scales,
395        grid_sizes,
396        combined_rotation,
397        scale_threshold,
398    })
399}