use crate::error::FdarError;
use crate::matrix::FdMatrix;
use nalgebra::SVD;
use super::{cross_distance_matrix, self_distance_matrix};
fn center_data(data: &FdMatrix) -> (nalgebra::DMatrix<f64>, Vec<f64>) {
let n = data.nrows();
let m = data.ncols();
let mut means = vec![0.0; m];
for j in 0..m {
let mut sum = 0.0;
for i in 0..n {
sum += data[(i, j)];
}
means[j] = sum / n as f64;
}
let mut centered = nalgebra::DMatrix::zeros(n, m);
for i in 0..n {
for j in 0..m {
centered[(i, j)] = data[(i, j)] - means[j];
}
}
(centered, means)
}
fn project_scores(
data: &FdMatrix,
means: &[f64],
v_t: &nalgebra::DMatrix<f64>,
ncomp: usize,
) -> Vec<Vec<f64>> {
let n = data.nrows();
let m = data.ncols();
(0..n)
.map(|i| {
(0..ncomp)
.map(|k| {
let mut s = 0.0;
for j in 0..m {
s += (data[(i, j)] - means[j]) * v_t[(k, j)];
}
s
})
.collect()
})
.collect()
}
fn score_distance(a: &[f64], b: &[f64]) -> f64 {
a.iter()
.zip(b.iter())
.map(|(x, y)| (x - y).powi(2))
.sum::<f64>()
.sqrt()
}
pub fn pca_self_1d(data: &FdMatrix, ncomp: usize) -> Result<FdMatrix, FdarError> {
let n = data.nrows();
let m = data.ncols();
if n < 2 {
return Err(FdarError::InvalidDimension {
parameter: "data",
expected: "at least 2 rows".to_string(),
actual: format!("{n} rows"),
});
}
if ncomp == 0 || ncomp > n.min(m) {
return Err(FdarError::InvalidParameter {
parameter: "ncomp",
message: format!("must be in 1..={}, got {ncomp}", n.min(m)),
});
}
let (centered, _means) = center_data(data);
let svd = SVD::new(centered, true, true);
let v_t = svd
.v_t
.as_ref()
.ok_or_else(|| FdarError::ComputationFailed {
operation: "SVD",
detail: "failed to compute V^T matrix".to_string(),
})?;
let u = svd.u.as_ref().ok_or_else(|| FdarError::ComputationFailed {
operation: "SVD",
detail: "failed to compute U matrix".to_string(),
})?;
let scores: Vec<Vec<f64>> = (0..n)
.map(|i| {
(0..ncomp)
.map(|k| u[(i, k)] * svd.singular_values[k])
.collect()
})
.collect();
let _ = v_t;
Ok(self_distance_matrix(n, |i, j| {
score_distance(&scores[i], &scores[j])
}))
}
pub fn pca_cross_1d(
data1: &FdMatrix,
data2: &FdMatrix,
ncomp: usize,
) -> Result<FdMatrix, FdarError> {
let n1 = data1.nrows();
let n2 = data2.nrows();
let m = data1.ncols();
if n1 < 2 {
return Err(FdarError::InvalidDimension {
parameter: "data1",
expected: "at least 2 rows".to_string(),
actual: format!("{n1} rows"),
});
}
if data2.ncols() != m {
return Err(FdarError::InvalidDimension {
parameter: "data2",
expected: format!("{m} columns (matching data1)"),
actual: format!("{} columns", data2.ncols()),
});
}
if ncomp == 0 || ncomp > n1.min(m) {
return Err(FdarError::InvalidParameter {
parameter: "ncomp",
message: format!("must be in 1..={}, got {ncomp}", n1.min(m)),
});
}
let (centered, means) = center_data(data1);
let svd = SVD::new(centered, true, true);
let v_t = svd
.v_t
.as_ref()
.ok_or_else(|| FdarError::ComputationFailed {
operation: "SVD",
detail: "failed to compute V^T matrix".to_string(),
})?;
let scores1 = project_scores(data1, &means, v_t, ncomp);
let scores2 = project_scores(data2, &means, v_t, ncomp);
Ok(cross_distance_matrix(n1, n2, |i, j| {
score_distance(&scores1[i], &scores2[j])
}))
}