use crate::error::FdarError;
use crate::matrix::FdMatrix;
use nalgebra::SVD;
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct MfpcaConfig {
pub ncomp: usize,
pub weighted: bool,
}
impl Default for MfpcaConfig {
fn default() -> Self {
Self {
ncomp: 5,
weighted: true,
}
}
}
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub struct MfpcaResult {
pub scores: FdMatrix,
pub eigenfunctions: Vec<FdMatrix>,
pub eigenvalues: Vec<f64>,
pub means: Vec<Vec<f64>>,
pub scales: Vec<f64>,
pub grid_sizes: Vec<usize>,
pub(super) combined_rotation: FdMatrix,
pub(super) scale_threshold: f64,
}
impl MfpcaResult {
pub fn project(&self, new_data: &[&FdMatrix]) -> Result<FdMatrix, FdarError> {
if new_data.len() != self.means.len() {
return Err(FdarError::InvalidDimension {
parameter: "new_data",
expected: format!("{} variables", self.means.len()),
actual: format!("{} variables", new_data.len()),
});
}
let total_input_cols: usize = new_data.iter().map(|v| v.ncols()).sum();
let expected_total: usize = self.grid_sizes.iter().sum();
if total_input_cols != expected_total {
return Err(FdarError::InvalidDimension {
parameter: "new_data",
expected: format!("{expected_total} total columns across all variables"),
actual: format!("{total_input_cols} total columns"),
});
}
let n_new = new_data[0].nrows();
let ncomp = self.scores.ncols();
let total_cols: usize = self.grid_sizes.iter().sum();
let mut stacked = FdMatrix::zeros(n_new, total_cols);
let mut col_offset = 0;
for (p, &var) in new_data.iter().enumerate() {
let m_p = self.grid_sizes[p];
if var.ncols() != m_p {
return Err(FdarError::InvalidDimension {
parameter: "new_data",
expected: format!("{m_p} columns for variable {p}"),
actual: format!("{} columns", var.ncols()),
});
}
if var.nrows() != n_new {
return Err(FdarError::InvalidDimension {
parameter: "new_data",
expected: format!("{n_new} rows for all variables"),
actual: format!("{} rows for variable {p}", var.nrows()),
});
}
let scale = if self.scales[p] >= self.scale_threshold {
self.scales[p]
} else {
1.0
};
for i in 0..n_new {
for j in 0..m_p {
let centered = var[(i, j)] - self.means[p][j];
stacked[(i, col_offset + j)] = centered / scale;
}
}
col_offset += m_p;
}
let mut scores = FdMatrix::zeros(n_new, ncomp);
for i in 0..n_new {
for k in 0..ncomp {
let mut sum = 0.0;
for j in 0..total_cols {
sum += stacked[(i, j)] * self.combined_rotation[(j, k)];
}
scores[(i, k)] = sum;
}
}
Ok(scores)
}
pub fn reconstruct(&self, scores: &FdMatrix, ncomp: usize) -> Result<Vec<FdMatrix>, FdarError> {
let max_comp = self.combined_rotation.ncols().min(scores.ncols());
if ncomp == 0 || ncomp > max_comp {
return Err(FdarError::InvalidParameter {
parameter: "ncomp",
message: format!("ncomp={ncomp} must be in 1..={max_comp}"),
});
}
let n = scores.nrows();
let total_cols: usize = self.grid_sizes.iter().sum();
let mut stacked = FdMatrix::zeros(n, total_cols);
for i in 0..n {
for j in 0..total_cols {
let mut val = 0.0;
for k in 0..ncomp {
val += scores[(i, k)] * self.combined_rotation[(j, k)];
}
stacked[(i, j)] = val;
}
}
let mut result = Vec::with_capacity(self.means.len());
let mut col_offset = 0;
for (p, m_p) in self.grid_sizes.iter().enumerate() {
let scale = if self.scales[p] >= self.scale_threshold {
self.scales[p]
} else {
1.0
};
let mut var_mat = FdMatrix::zeros(n, *m_p);
for i in 0..n {
for j in 0..*m_p {
var_mat[(i, j)] = stacked[(i, col_offset + j)] * scale + self.means[p][j];
}
}
col_offset += m_p;
result.push(var_mat);
}
Ok(result)
}
}
#[must_use = "expensive computation whose result should not be discarded"]
pub fn mfpca(variables: &[&FdMatrix], config: &MfpcaConfig) -> Result<MfpcaResult, FdarError> {
if variables.is_empty() {
return Err(FdarError::InvalidDimension {
parameter: "variables",
expected: "at least 1 variable".to_string(),
actual: "0 variables".to_string(),
});
}
let n = variables[0].nrows();
if n < 2 {
return Err(FdarError::InvalidDimension {
parameter: "variables",
expected: "at least 2 observations".to_string(),
actual: format!("{n} observations"),
});
}
for (p, var) in variables.iter().enumerate() {
if var.nrows() != n {
return Err(FdarError::InvalidDimension {
parameter: "variables",
expected: format!("{n} rows for all variables"),
actual: format!("{} rows for variable {p}", var.nrows()),
});
}
}
let grid_sizes: Vec<usize> = variables.iter().map(|v| v.ncols()).collect();
let total_cols: usize = grid_sizes.iter().sum();
let ncomp = config.ncomp.min(n).min(total_cols);
let mut means: Vec<Vec<f64>> = Vec::with_capacity(variables.len());
let mut scales: Vec<f64> = Vec::with_capacity(variables.len());
for var in variables.iter() {
let (_, m_p) = var.shape();
let mut mean = vec![0.0; m_p];
for j in 0..m_p {
let col = var.column(j);
mean[j] = col.iter().sum::<f64>() / n as f64;
}
let mut mean_var = 0.0;
for j in 0..m_p {
let col = var.column(j);
let var_j: f64 =
col.iter().map(|&v| (v - mean[j]).powi(2)).sum::<f64>() / (n as f64 - 1.0);
mean_var += var_j;
}
mean_var /= m_p as f64;
let scale = mean_var.sqrt();
means.push(mean);
scales.push(scale);
}
let max_scale = scales.iter().cloned().fold(0.0_f64, f64::max);
let scale_threshold = 1e-12 * max_scale.max(1e-15);
let mut stacked = FdMatrix::zeros(n, total_cols);
let mut col_offset = 0;
for (p, var) in variables.iter().enumerate() {
let m_p = grid_sizes[p];
let scale = if config.weighted && scales[p] > scale_threshold {
scales[p]
} else {
1.0
};
if !config.weighted {
scales[p] = 1.0;
}
for i in 0..n {
for j in 0..m_p {
let centered = var[(i, j)] - means[p][j];
stacked[(i, col_offset + j)] = centered / scale;
}
}
col_offset += m_p;
}
let svd = SVD::new(stacked.to_dmatrix(), true, true);
let v_t = svd
.v_t
.as_ref()
.ok_or_else(|| FdarError::ComputationFailed {
operation: "MFPCA SVD",
detail: "SVD failed to produce V_t matrix".to_string(),
})?;
let u = svd.u.as_ref().ok_or_else(|| FdarError::ComputationFailed {
operation: "MFPCA SVD",
detail: "SVD failed to produce U matrix".to_string(),
})?;
let singular_values: Vec<f64> = svd.singular_values.iter().take(ncomp).copied().collect();
let eigenvalues: Vec<f64> = singular_values
.iter()
.map(|&sv| sv * sv / (n as f64 - 1.0))
.collect();
let mut combined_rotation = FdMatrix::zeros(total_cols, ncomp);
for k in 0..ncomp {
for j in 0..total_cols {
combined_rotation[(j, k)] = v_t[(k, j)];
}
}
let mut scores = FdMatrix::zeros(n, ncomp);
for k in 0..ncomp {
let sv_k = singular_values[k];
for i in 0..n {
scores[(i, k)] = u[(i, k)] * sv_k;
}
}
let mut eigenfunctions = Vec::with_capacity(variables.len());
let mut col_off = 0;
for m_p in &grid_sizes {
let mut ef = FdMatrix::zeros(*m_p, ncomp);
for k in 0..ncomp {
for j in 0..*m_p {
ef[(j, k)] = combined_rotation[(col_off + j, k)];
}
}
col_off += m_p;
eigenfunctions.push(ef);
}
Ok(MfpcaResult {
scores,
eigenfunctions,
eigenvalues,
means,
scales,
grid_sizes,
combined_rotation,
scale_threshold,
})
}