use scirs2_core::ndarray::{Array1, Array2, Axis};
use super::basis::{compute_basis_coefficients, evaluate_basis, gcv_select_lambda};
use super::types::{FPCAResult, FunctionalConfig, FunctionalData};
use crate::error::{StatsError, StatsResult};
pub fn functional_pca(data: &FunctionalData, config: &FunctionalConfig) -> StatsResult<FPCAResult> {
let n_curves = data.n_curves();
let n_grid = data.n_grid();
let n_components = config.n_components.min(n_curves).min(n_grid);
if n_components == 0 {
return Err(StatsError::InvalidArgument(
"n_components must be at least 1".to_string(),
));
}
let phi = evaluate_basis(&data.grid, &config.basis)?;
let lambda = match config.smoothing_param {
Some(lam) => lam,
None => {
gcv_select_lambda(&data.observations[0], &data.grid, &config.basis)?
}
};
let mut smoothed = Array2::<f64>::zeros((n_curves, n_grid));
for (i, obs) in data.observations.iter().enumerate() {
let coeffs = compute_basis_coefficients(obs, &phi, lambda)?;
let fitted = phi.dot(&coeffs);
for j in 0..n_grid {
smoothed[[i, j]] = fitted[j];
}
}
let mean_func = smoothed.mean_axis(Axis(0)).ok_or_else(|| {
StatsError::ComputationError("Failed to compute mean function".to_string())
})?;
let mut centered = Array2::<f64>::zeros((n_curves, n_grid));
for i in 0..n_curves {
for j in 0..n_grid {
centered[[i, j]] = smoothed[[i, j]] - mean_func[j];
}
}
let dt = compute_grid_spacing(&data.grid);
let n_minus_1 = if n_curves > 1 {
(n_curves - 1) as f64
} else {
1.0
};
let cov_matrix = centered.t().dot(¢ered) / n_minus_1;
let sqrt_dt: Array1<f64> = dt.iter().map(|d| d.sqrt()).collect::<Vec<_>>().into();
let mut weighted_cov = Array2::<f64>::zeros((n_grid, n_grid));
for i in 0..n_grid {
for j in 0..n_grid {
weighted_cov[[i, j]] = sqrt_dt[i] * cov_matrix[[i, j]] * sqrt_dt[j];
}
}
let (eigenvalues, eigenvectors) = symmetric_eigen_decomposition(&weighted_cov, n_components)?;
let mut eigenfunctions = Array2::<f64>::zeros((n_components, n_grid));
for k in 0..n_components {
for j in 0..n_grid {
let inv_sqrt = if sqrt_dt[j].abs() > 1e-14 {
1.0 / sqrt_dt[j]
} else {
0.0
};
eigenfunctions[[k, j]] = eigenvectors[[j, k]] * inv_sqrt;
}
let norm_sq: f64 = (0..n_grid)
.map(|j| eigenfunctions[[k, j]].powi(2) * dt[j])
.sum();
let norm = norm_sq.sqrt();
if norm > 1e-14 {
for j in 0..n_grid {
eigenfunctions[[k, j]] /= norm;
}
}
}
let mut scores = Array2::<f64>::zeros((n_curves, n_components));
for i in 0..n_curves {
for k in 0..n_components {
let mut score = 0.0;
for j in 0..n_grid {
score += centered[[i, j]] * eigenfunctions[[k, j]] * dt[j];
}
scores[[i, k]] = score;
}
}
let total_variance: f64 = eigenvalues.iter().sum::<f64>();
let variance_explained = if total_variance > 1e-14 {
&eigenvalues / total_variance
} else {
Array1::<f64>::zeros(n_components)
};
Ok(FPCAResult {
eigenvalues,
eigenfunctions,
scores,
variance_explained,
grid: data.grid.clone(),
})
}
pub fn reconstruct_from_scores(result: &FPCAResult, n_components: usize) -> Vec<Vec<f64>> {
let n_curves = result.scores.nrows();
let n_grid = result.grid.len();
let k = n_components
.min(result.eigenvalues.len())
.min(result.eigenfunctions.nrows());
let mut reconstructed = Vec::with_capacity(n_curves);
for i in 0..n_curves {
let mut curve = vec![0.0; n_grid];
for comp in 0..k {
let score = result.scores[[i, comp]];
for j in 0..n_grid {
curve[j] += score * result.eigenfunctions[[comp, j]];
}
}
reconstructed.push(curve);
}
reconstructed
}
fn compute_grid_spacing(grid: &[f64]) -> Vec<f64> {
let n = grid.len();
if n <= 1 {
return vec![1.0; n];
}
let mut dt = vec![0.0; n];
dt[0] = (grid[1] - grid[0]) / 2.0;
for i in 1..n - 1 {
dt[i] = (grid[i + 1] - grid[i - 1]) / 2.0;
}
dt[n - 1] = (grid[n - 1] - grid[n - 2]) / 2.0;
dt
}
fn symmetric_eigen_decomposition(
a: &Array2<f64>,
k: usize,
) -> StatsResult<(Array1<f64>, Array2<f64>)> {
let n = a.nrows();
let k = k.min(n);
let max_iter = 1000;
let tol = 1e-12;
let mut eigenvalues = Array1::<f64>::zeros(k);
let mut eigenvectors = Array2::<f64>::zeros((n, k));
let mut deflated = a.clone();
for comp in 0..k {
let mut v = Array1::<f64>::zeros(n);
for i in 0..n {
v[i] = if i % 2 == 0 { 1.0 } else { -1.0 };
}
let norm: f64 = v.iter().map(|x| x * x).sum::<f64>().sqrt();
if norm > 1e-14 {
v /= norm;
}
let mut eigenvalue = 0.0;
for _iter in 0..max_iter {
let w = deflated.dot(&v);
let new_eigenvalue: f64 = v.iter().zip(w.iter()).map(|(a, b)| a * b).sum();
let w_norm = w.iter().map(|x| x * x).sum::<f64>().sqrt();
if w_norm < 1e-14 {
eigenvalue = 0.0;
break;
}
let new_v = &w / w_norm;
if (new_eigenvalue - eigenvalue).abs() < tol * (1.0 + eigenvalue.abs()) {
eigenvalue = new_eigenvalue;
v = new_v;
break;
}
eigenvalue = new_eigenvalue;
v = new_v;
}
eigenvalues[comp] = eigenvalue.max(0.0); for i in 0..n {
eigenvectors[[i, comp]] = v[i];
}
for i in 0..n {
for j in 0..n {
deflated[[i, j]] -= eigenvalue * v[i] * v[j];
}
}
}
Ok((eigenvalues, eigenvectors))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::functional::types::BasisType;
#[test]
fn test_fpca_sine_cosine_mixture() {
let n_grid = 100;
let grid: Vec<f64> = (0..n_grid).map(|i| i as f64 / n_grid as f64).collect();
let n_curves = 50;
let mut observations = Vec::with_capacity(n_curves);
for i in 0..n_curves {
let a = (i as f64 * 0.37).sin() * 2.0; let b = (i as f64 * 0.73).cos() * 1.0; let curve: Vec<f64> = grid
.iter()
.map(|&t| {
a * (2.0 * std::f64::consts::PI * t).sin()
+ b * (2.0 * std::f64::consts::PI * t).cos()
+ 0.01 * (i as f64 * t * 17.3).sin() })
.collect();
observations.push(curve);
}
let data = FunctionalData::new(grid, observations).expect("Data creation should succeed");
let config = FunctionalConfig {
basis: BasisType::BSpline {
n_basis: 20,
degree: 3,
},
smoothing_param: Some(1e-6),
n_components: 4,
};
let result = functional_pca(&data, &config).expect("fPCA should succeed");
let top2_var: f64 = result.variance_explained[0] + result.variance_explained[1];
assert!(
top2_var > 0.95,
"Top 2 components should explain >95% variance, got {}",
top2_var
);
for i in 1..result.eigenvalues.len() {
assert!(
result.eigenvalues[i] <= result.eigenvalues[i - 1] + 1e-10,
"Eigenvalues should be descending"
);
}
}
#[test]
fn test_fpca_variance_explained_sums_to_one() {
let n_grid = 50;
let grid: Vec<f64> = (0..n_grid).map(|i| i as f64 / n_grid as f64).collect();
let n_curves = 30;
let mut observations = Vec::with_capacity(n_curves);
for i in 0..n_curves {
let curve: Vec<f64> = grid
.iter()
.map(|&t| (i as f64 * 0.5).sin() * t + (i as f64 * 0.3).cos() * t * t)
.collect();
observations.push(curve);
}
let data = FunctionalData::new(grid, observations).expect("Data creation should succeed");
let config = FunctionalConfig {
basis: BasisType::Fourier { n_basis: 11 },
smoothing_param: Some(1e-4),
n_components: 5,
};
let result = functional_pca(&data, &config).expect("fPCA should succeed");
for &ve in result.variance_explained.iter() {
assert!(ve >= -1e-10, "Variance explained should be non-negative");
}
let total: f64 = result.variance_explained.iter().sum();
assert!(
total <= 1.0 + 1e-6,
"Sum of variance explained should be <= 1.0, got {}",
total
);
}
#[test]
fn test_reconstruct_from_scores() {
let n_grid = 50;
let grid: Vec<f64> = (0..n_grid).map(|i| i as f64 / n_grid as f64).collect();
let n_curves = 20;
let mut observations = Vec::with_capacity(n_curves);
for i in 0..n_curves {
let curve: Vec<f64> = grid
.iter()
.map(|&t| {
(i as f64 * 0.4).sin() * (2.0 * std::f64::consts::PI * t).sin()
+ (i as f64 * 0.7).cos() * t
})
.collect();
observations.push(curve);
}
let data = FunctionalData::new(grid, observations).expect("Data creation should succeed");
let config = FunctionalConfig {
basis: BasisType::BSpline {
n_basis: 15,
degree: 3,
},
smoothing_param: Some(1e-5),
n_components: 3,
};
let result = functional_pca(&data, &config).expect("fPCA should succeed");
let reconstructed = reconstruct_from_scores(&result, 3);
assert_eq!(reconstructed.len(), n_curves);
assert_eq!(reconstructed[0].len(), n_grid);
let recon_1 = reconstruct_from_scores(&result, 1);
assert_eq!(recon_1.len(), n_curves);
}
#[test]
fn test_fpca_eigenfunctions_orthogonal() {
let n_grid = 60;
let grid: Vec<f64> = (0..n_grid).map(|i| i as f64 / n_grid as f64).collect();
let n_curves = 30;
let mut observations = Vec::with_capacity(n_curves);
for i in 0..n_curves {
let curve: Vec<f64> = grid
.iter()
.map(|&t| {
(i as f64 * 0.5).sin() * (2.0 * std::f64::consts::PI * t).sin()
+ (i as f64 * 0.3).cos() * (4.0 * std::f64::consts::PI * t).cos()
+ (i as f64 * 0.7).sin() * t
})
.collect();
observations.push(curve);
}
let data =
FunctionalData::new(grid.clone(), observations).expect("Data creation should succeed");
let config = FunctionalConfig {
basis: BasisType::BSpline {
n_basis: 15,
degree: 3,
},
smoothing_param: Some(1e-5),
n_components: 3,
};
let result = functional_pca(&data, &config).expect("fPCA should succeed");
let dt: Vec<f64> = {
let n = grid.len();
let mut d = vec![0.0; n];
d[0] = (grid[1] - grid[0]) / 2.0;
for i in 1..n - 1 {
d[i] = (grid[i + 1] - grid[i - 1]) / 2.0;
}
d[n - 1] = (grid[n - 1] - grid[n - 2]) / 2.0;
d
};
for k1 in 0..result.eigenfunctions.nrows() {
for k2 in (k1 + 1)..result.eigenfunctions.nrows() {
let inner: f64 = (0..n_grid)
.map(|j| {
result.eigenfunctions[[k1, j]] * result.eigenfunctions[[k2, j]] * dt[j]
})
.sum();
assert!(
inner.abs() < 0.1,
"Eigenfunctions {} and {} should be orthogonal, inner product = {}",
k1,
k2,
inner
);
}
}
}
}