Skip to main content

fdars_core/spm/
mfpca.rs

1//! Multivariate Functional Principal Component Analysis (MFPCA).
2//!
3//! Extends univariate FPCA to handle multiple functional variables
4//! observed on potentially different grids. Variables are optionally
5//! weighted by their inverse standard deviation before joint SVD.
6//!
7//! # Algorithm
8//!
9//! 1. Center each variable by its column means.
10//! 2. Standardize each variable by its scale (sqrt of mean column variance)
11//!    when `weighted = true`. This ensures variables on different scales
12//!    contribute equally to the joint decomposition.
13//! 3. Horizontally concatenate the centered/scaled variables into an
14//!    n × (sum m_p) matrix and perform truncated SVD.
15//! 4. Extract scores as U * S and eigenfunctions from V. Eigenfunctions
16//!    are back-scaled by the per-variable scale factor when reconstructing,
17//!    so that reconstruction returns values on the original measurement scale.
18//!
19//! # Projection accuracy
20//!
21//! The projection error ||X - X_hat||² is bounded by sum_{l > ncomp} lambda_l
22//! (the sum of discarded eigenvalues). This provides an a priori error bound
23//! for choosing ncomp: retain enough components so that the discarded
24//! eigenvalue tail is acceptably small relative to the total variance.
25//!
26//! # Scale threshold
27//!
28//! Variables with scale < 1e-12 (relative to the maximum scale across all
29//! variables) are treated as constant and receive unit weight (scale = 1.0).
30//! This prevents numerical issues from near-zero denominators in the
31//! standardization step.
32//!
33//! # References
34//!
35//! - Happ, C. & Greven, S. (2018). Multivariate functional principal component
36//!   analysis for data observed on different (dimensional) domains. *Journal of
37//!   the American Statistical Association*, 113(522), 649-659. The weighting
38//!   scheme follows Section 2.2 (standardization by variable-specific scale)
39//!   and the eigendecomposition is described in Section 2.3, Eq. 3-5.
40
41use crate::error::FdarError;
42use crate::matrix::FdMatrix;
43use nalgebra::SVD;
44
45/// Configuration for multivariate FPCA.
46#[derive(Debug, Clone, PartialEq)]
47#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
48pub struct MfpcaConfig {
49    /// Number of principal components to extract (default 5).
50    pub ncomp: usize,
51    /// Whether to weight each variable by 1/std_dev before SVD (default true).
52    pub weighted: bool,
53}
54
55impl Default for MfpcaConfig {
56    fn default() -> Self {
57        Self {
58            ncomp: 5,
59            weighted: true,
60        }
61    }
62}
63
64/// Result of multivariate FPCA.
65#[derive(Debug, Clone, PartialEq)]
66#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
67#[non_exhaustive]
68pub struct MfpcaResult {
69    /// Score matrix (n x ncomp).
70    pub scores: FdMatrix,
71    /// Eigenfunctions split by variable (one FdMatrix per variable, each m_p x ncomp).
72    pub eigenfunctions: Vec<FdMatrix>,
73    /// Eigenvalues (length ncomp): squared singular values divided by (n-1).
74    pub eigenvalues: Vec<f64>,
75    /// Per-variable mean functions.
76    pub means: Vec<Vec<f64>>,
77    /// Per-variable standard deviations (sqrt of mean column variance).
78    pub scales: Vec<f64>,
79    /// Grid sizes per variable.
80    pub grid_sizes: Vec<usize>,
81    /// Combined rotation matrix (sum(m_p) x ncomp) — internal use for projection.
82    pub(super) combined_rotation: FdMatrix,
83    /// Threshold below which a variable's scale is treated as 1.0 (avoids division by near-zero).
84    pub(super) scale_threshold: f64,
85}
86
87impl MfpcaResult {
88    /// Project new multivariate functional data onto the MFPCA score space.
89    ///
90    /// Each element of `new_data` is an n_new x m_p matrix for variable p.
91    ///
92    /// # Errors
93    ///
94    /// Returns [`FdarError::InvalidDimension`] if the number of variables or
95    /// their grid sizes do not match the training data.
96    pub fn project(&self, new_data: &[&FdMatrix]) -> Result<FdMatrix, FdarError> {
97        if new_data.len() != self.means.len() {
98            return Err(FdarError::InvalidDimension {
99                parameter: "new_data",
100                expected: format!("{} variables", self.means.len()),
101                actual: format!("{} variables", new_data.len()),
102            });
103        }
104
105        // Early check: total columns across all variables must match the
106        // combined rotation matrix rows (i.e., the sum of training grid sizes).
107        let total_input_cols: usize = new_data.iter().map(|v| v.ncols()).sum();
108        let expected_total: usize = self.grid_sizes.iter().sum();
109        if total_input_cols != expected_total {
110            return Err(FdarError::InvalidDimension {
111                parameter: "new_data",
112                expected: format!("{expected_total} total columns across all variables"),
113                actual: format!("{total_input_cols} total columns"),
114            });
115        }
116
117        let n_new = new_data[0].nrows();
118        let ncomp = self.scores.ncols();
119        let total_cols: usize = self.grid_sizes.iter().sum();
120
121        // Center, scale, and stack
122        let mut stacked = FdMatrix::zeros(n_new, total_cols);
123        let mut col_offset = 0;
124        for (p, &var) in new_data.iter().enumerate() {
125            let m_p = self.grid_sizes[p];
126            if var.ncols() != m_p {
127                return Err(FdarError::InvalidDimension {
128                    parameter: "new_data",
129                    expected: format!("{m_p} columns for variable {p}"),
130                    actual: format!("{} columns", var.ncols()),
131                });
132            }
133            if var.nrows() != n_new {
134                return Err(FdarError::InvalidDimension {
135                    parameter: "new_data",
136                    expected: format!("{n_new} rows for all variables"),
137                    actual: format!("{} rows for variable {p}", var.nrows()),
138                });
139            }
140            let scale = if self.scales[p] >= self.scale_threshold {
141                self.scales[p]
142            } else {
143                1.0
144            };
145            for i in 0..n_new {
146                for j in 0..m_p {
147                    let centered = var[(i, j)] - self.means[p][j];
148                    stacked[(i, col_offset + j)] = centered / scale;
149                }
150            }
151            col_offset += m_p;
152        }
153
154        // Multiply by combined rotation: scores = stacked * rotation
155        let mut scores = FdMatrix::zeros(n_new, ncomp);
156        for i in 0..n_new {
157            for k in 0..ncomp {
158                let mut sum = 0.0;
159                for j in 0..total_cols {
160                    sum += stacked[(i, j)] * self.combined_rotation[(j, k)];
161                }
162                scores[(i, k)] = sum;
163            }
164        }
165        Ok(scores)
166    }
167
168    /// Reconstruct multivariate functional data from scores.
169    ///
170    /// Returns one FdMatrix per variable (n x m_p).
171    ///
172    /// # Errors
173    ///
174    /// Returns [`FdarError::InvalidParameter`] if `ncomp` exceeds available components.
175    pub fn reconstruct(&self, scores: &FdMatrix, ncomp: usize) -> Result<Vec<FdMatrix>, FdarError> {
176        let max_comp = self.combined_rotation.ncols().min(scores.ncols());
177        if ncomp == 0 || ncomp > max_comp {
178            return Err(FdarError::InvalidParameter {
179                parameter: "ncomp",
180                message: format!("ncomp={ncomp} must be in 1..={max_comp}"),
181            });
182        }
183
184        let n = scores.nrows();
185        let total_cols: usize = self.grid_sizes.iter().sum();
186
187        // Reconstruct in combined space: stacked_recon = scores * rotation^T
188        let mut stacked = FdMatrix::zeros(n, total_cols);
189        for i in 0..n {
190            for j in 0..total_cols {
191                let mut val = 0.0;
192                for k in 0..ncomp {
193                    val += scores[(i, k)] * self.combined_rotation[(j, k)];
194                }
195                stacked[(i, j)] = val;
196            }
197        }
198
199        // Split by variable, un-scale, and add means
200        let mut result = Vec::with_capacity(self.means.len());
201        let mut col_offset = 0;
202        for (p, m_p) in self.grid_sizes.iter().enumerate() {
203            let scale = if self.scales[p] >= self.scale_threshold {
204                self.scales[p]
205            } else {
206                1.0
207            };
208            let mut var_mat = FdMatrix::zeros(n, *m_p);
209            for i in 0..n {
210                for j in 0..*m_p {
211                    var_mat[(i, j)] = stacked[(i, col_offset + j)] * scale + self.means[p][j];
212                }
213            }
214            col_offset += m_p;
215            result.push(var_mat);
216        }
217        Ok(result)
218    }
219}
220
221/// Perform multivariate FPCA on multiple functional variables.
222///
223/// # Arguments
224/// * `variables` - Slice of n x m_p matrices, one per functional variable
225/// * `config` - MFPCA configuration
226///
227/// # Example
228///
229/// ```
230/// use fdars_core::matrix::FdMatrix;
231/// use fdars_core::spm::mfpca::{mfpca, MfpcaConfig};
232/// let var1 = FdMatrix::from_column_major(vec![1.0,2.0,3.0,4.0,5.0,6.0], 3, 2).unwrap();
233/// let var2 = FdMatrix::from_column_major(vec![0.5,1.5,2.5,3.5,4.5,5.5], 3, 2).unwrap();
234/// let config = MfpcaConfig { ncomp: 2, weighted: true };
235/// let result = mfpca(&[&var1, &var2], &config).unwrap();
236/// assert_eq!(result.eigenvalues.len(), 2);
237/// assert!(result.eigenvalues[0] >= result.eigenvalues[1]);
238/// ```
239///
240/// # Errors
241///
242/// Returns [`FdarError::InvalidDimension`] if no variables are provided or
243/// variables have inconsistent row counts. Returns [`FdarError::ComputationFailed`]
244/// if the SVD fails.
245#[must_use = "expensive computation whose result should not be discarded"]
246pub fn mfpca(variables: &[&FdMatrix], config: &MfpcaConfig) -> Result<MfpcaResult, FdarError> {
247    if variables.is_empty() {
248        return Err(FdarError::InvalidDimension {
249            parameter: "variables",
250            expected: "at least 1 variable".to_string(),
251            actual: "0 variables".to_string(),
252        });
253    }
254
255    let n = variables[0].nrows();
256    if n < 2 {
257        return Err(FdarError::InvalidDimension {
258            parameter: "variables",
259            expected: "at least 2 observations".to_string(),
260            actual: format!("{n} observations"),
261        });
262    }
263
264    for (p, var) in variables.iter().enumerate() {
265        if var.nrows() != n {
266            return Err(FdarError::InvalidDimension {
267                parameter: "variables",
268                expected: format!("{n} rows for all variables"),
269                actual: format!("{} rows for variable {p}", var.nrows()),
270            });
271        }
272    }
273
274    let grid_sizes: Vec<usize> = variables.iter().map(|v| v.ncols()).collect();
275    let total_cols: usize = grid_sizes.iter().sum();
276    let ncomp = config.ncomp.min(n).min(total_cols);
277
278    // Step 1: Center each variable and compute scale
279    let mut means: Vec<Vec<f64>> = Vec::with_capacity(variables.len());
280    let mut scales: Vec<f64> = Vec::with_capacity(variables.len());
281
282    for var in variables.iter() {
283        let (_, m_p) = var.shape();
284        let mut mean = vec![0.0; m_p];
285        for j in 0..m_p {
286            let col = var.column(j);
287            mean[j] = col.iter().sum::<f64>() / n as f64;
288        }
289
290        // Scale = sqrt(mean of column variances)
291        let mut mean_var = 0.0;
292        for j in 0..m_p {
293            let col = var.column(j);
294            let var_j: f64 =
295                col.iter().map(|&v| (v - mean[j]).powi(2)).sum::<f64>() / (n as f64 - 1.0);
296            mean_var += var_j;
297        }
298        mean_var /= m_p as f64;
299        let scale = mean_var.sqrt();
300
301        means.push(mean);
302        scales.push(scale);
303    }
304
305    // Use relative threshold for scale: 1e-12 * max(scales).
306    // Variables with scale below this threshold contribute negligible variance
307    // and are effectively constant. Treating them as unscaled avoids division
308    // by near-zero values.
309    let max_scale = scales.iter().cloned().fold(0.0_f64, f64::max);
310    let scale_threshold = 1e-12 * max_scale.max(1e-15); // floor to avoid 0
311
312    // Step 2: Build stacked matrix (n x total_cols)
313    let mut stacked = FdMatrix::zeros(n, total_cols);
314    let mut col_offset = 0;
315    for (p, var) in variables.iter().enumerate() {
316        let m_p = grid_sizes[p];
317        let scale = if config.weighted && scales[p] > scale_threshold {
318            scales[p]
319        } else {
320            1.0
321        };
322        // Update scales to reflect actual scaling applied
323        if !config.weighted {
324            scales[p] = 1.0;
325        }
326        for i in 0..n {
327            for j in 0..m_p {
328                let centered = var[(i, j)] - means[p][j];
329                stacked[(i, col_offset + j)] = centered / scale;
330            }
331        }
332        col_offset += m_p;
333    }
334
335    // Step 3: SVD on stacked matrix
336    let svd = SVD::new(stacked.to_dmatrix(), true, true);
337
338    let v_t = svd
339        .v_t
340        .as_ref()
341        .ok_or_else(|| FdarError::ComputationFailed {
342            operation: "MFPCA SVD",
343            detail: "SVD failed to produce V_t matrix".to_string(),
344        })?;
345
346    let u = svd.u.as_ref().ok_or_else(|| FdarError::ComputationFailed {
347        operation: "MFPCA SVD",
348        detail: "SVD failed to produce U matrix".to_string(),
349    })?;
350
351    // Step 4: Extract components
352    let singular_values: Vec<f64> = svd.singular_values.iter().take(ncomp).copied().collect();
353
354    // Eigenvalues = sv^2 / (n - 1)
355    let eigenvalues: Vec<f64> = singular_values
356        .iter()
357        .map(|&sv| sv * sv / (n as f64 - 1.0))
358        .collect();
359
360    // Combined rotation: V[:, :ncomp] (total_cols x ncomp)
361    let mut combined_rotation = FdMatrix::zeros(total_cols, ncomp);
362    for k in 0..ncomp {
363        for j in 0..total_cols {
364            combined_rotation[(j, k)] = v_t[(k, j)];
365        }
366    }
367
368    // Scores: U * S (n x ncomp)
369    let mut scores = FdMatrix::zeros(n, ncomp);
370    for k in 0..ncomp {
371        let sv_k = singular_values[k];
372        for i in 0..n {
373            scores[(i, k)] = u[(i, k)] * sv_k;
374        }
375    }
376
377    // Step 5: Split eigenfunctions by variable
378    let mut eigenfunctions = Vec::with_capacity(variables.len());
379    let mut col_off = 0;
380    for m_p in &grid_sizes {
381        let mut ef = FdMatrix::zeros(*m_p, ncomp);
382        for k in 0..ncomp {
383            for j in 0..*m_p {
384                ef[(j, k)] = combined_rotation[(col_off + j, k)];
385            }
386        }
387        col_off += m_p;
388        eigenfunctions.push(ef);
389    }
390
391    Ok(MfpcaResult {
392        scores,
393        eigenfunctions,
394        eigenvalues,
395        means,
396        scales,
397        grid_sizes,
398        combined_rotation,
399        scale_threshold,
400    })
401}