use super::srsf::reparameterize_curve;
use super::{
dp_alignment_core, dp_edge_weight, dp_grid_solve, dp_lambda_penalty, dp_path_to_gamma,
};
use crate::error::FdarError;
use crate::helpers::{cumulative_trapz, l2_distance, simpsons_weights};
use crate::iter_maybe_parallel;
use crate::matrix::{FdCurveSet, FdMatrix};
#[cfg(feature = "parallel")]
use rayon::iter::ParallelIterator;
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct AlignmentResultNd {
pub gamma: Vec<f64>,
pub f_aligned: Vec<Vec<f64>>,
pub distance: f64,
}
#[inline]
fn srsf_scale_point(derivs: &[FdMatrix], result_dims: &mut [FdMatrix], i: usize, j: usize) {
let d = derivs.len();
let norm_sq: f64 = derivs.iter().map(|dd| dd[(i, j)].powi(2)).sum();
let norm = norm_sq.sqrt();
if norm < 1e-15 {
for k in 0..d {
result_dims[k][(i, j)] = 0.0;
}
} else {
let scale = 1.0 / norm.sqrt();
for k in 0..d {
result_dims[k][(i, j)] = derivs[k][(i, j)] * scale;
}
}
}
pub fn srsf_transform_nd(data: &FdCurveSet, argvals: &[f64]) -> FdCurveSet {
let d = data.ndim();
let n = data.ncurves();
let m = data.npoints();
if d == 0 || n == 0 || m == 0 || argvals.len() != m {
return FdCurveSet {
dims: (0..d).map(|_| FdMatrix::zeros(n, m)).collect(),
};
}
let derivs: Vec<FdMatrix> = data
.dims
.iter()
.map(|dim_mat| crate::fdata::deriv_1d(dim_mat, argvals, 1))
.collect();
let mut result_dims: Vec<FdMatrix> = (0..d).map(|_| FdMatrix::zeros(n, m)).collect();
for i in 0..n {
for j in 0..m {
srsf_scale_point(&derivs, &mut result_dims, i, j);
}
}
FdCurveSet { dims: result_dims }
}
pub fn srsf_inverse_nd(q: &[Vec<f64>], argvals: &[f64], f0: &[f64]) -> Vec<Vec<f64>> {
let d = q.len();
if d == 0 {
return Vec::new();
}
let m = q[0].len();
if m == 0 {
return vec![Vec::new(); d];
}
let norms: Vec<f64> = (0..m)
.map(|j| {
let norm_sq: f64 = q.iter().map(|qk| qk[j].powi(2)).sum();
norm_sq.sqrt()
})
.collect();
let mut result = Vec::with_capacity(d);
for k in 0..d {
let integrand: Vec<f64> = (0..m).map(|j| q[k][j] * norms[j]).collect();
let integral = cumulative_trapz(&integrand, argvals);
let curve: Vec<f64> = integral.iter().map(|&v| f0[k] + v).collect();
result.push(curve);
}
result
}
fn dp_alignment_core_nd(
q1: &[Vec<f64>],
q2: &[Vec<f64>],
argvals: &[f64],
lambda: f64,
) -> Vec<f64> {
let d = q1.len();
let m = argvals.len();
if m < 2 || d == 0 {
return argvals.to_vec();
}
if d == 1 {
return dp_alignment_core(&q1[0], &q2[0], argvals, lambda);
}
let q1n: Vec<Vec<f64>> = q1
.iter()
.map(|qk| {
let norm = qk.iter().map(|&v| v * v).sum::<f64>().sqrt().max(1e-10);
qk.iter().map(|&v| v / norm).collect()
})
.collect();
let q2n: Vec<Vec<f64>> = q2
.iter()
.map(|qk| {
let norm = qk.iter().map(|&v| v * v).sum::<f64>().sqrt().max(1e-10);
qk.iter().map(|&v| v / norm).collect()
})
.collect();
let path = dp_grid_solve(m, m, |sr, sc, tr, tc| {
let w: f64 = (0..d)
.map(|k| dp_edge_weight(&q1n[k], &q2n[k], argvals, sc, tc, sr, tr))
.sum();
w + dp_lambda_penalty(argvals, sc, tc, sr, tr, lambda)
});
dp_path_to_gamma(&path, argvals)
}
pub fn elastic_align_pair_nd(
f1: &FdCurveSet,
f2: &FdCurveSet,
argvals: &[f64],
lambda: f64,
) -> AlignmentResultNd {
let d = f1.ndim();
let m = f1.npoints();
let q1_set = srsf_transform_nd(f1, argvals);
let q2_set = srsf_transform_nd(f2, argvals);
let q1: Vec<Vec<f64>> = q1_set.dims.iter().map(|dm| dm.row(0)).collect();
let q2: Vec<Vec<f64>> = q2_set.dims.iter().map(|dm| dm.row(0)).collect();
let gamma = dp_alignment_core_nd(&q1, &q2, argvals, lambda);
let f_aligned: Vec<Vec<f64>> = f2
.dims
.iter()
.map(|dm| {
let row = dm.row(0);
reparameterize_curve(&row, argvals, &gamma)
})
.collect();
let f_aligned_set = {
let dims: Vec<FdMatrix> = f_aligned
.iter()
.map(|fa| {
FdMatrix::from_slice(fa, 1, m).expect("dimension invariant: data.len() == n * m")
})
.collect();
FdCurveSet { dims }
};
let q_aligned = srsf_transform_nd(&f_aligned_set, argvals);
let weights = simpsons_weights(argvals);
let mut dist_sq = 0.0;
for k in 0..d {
let q1k = q1_set.dims[k].row(0);
let qak = q_aligned.dims[k].row(0);
let d_k = l2_distance(&q1k, &qak, &weights);
dist_sq += d_k * d_k;
}
AlignmentResultNd {
gamma,
f_aligned,
distance: dist_sq.sqrt(),
}
}
pub fn elastic_distance_nd(f1: &FdCurveSet, f2: &FdCurveSet, argvals: &[f64], lambda: f64) -> f64 {
elastic_align_pair_nd(f1, f2, argvals, lambda).distance
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct KarcherMeanResultNd {
pub mean: Vec<Vec<f64>>,
pub mean_srsf: Vec<Vec<f64>>,
pub gammas: FdMatrix,
pub aligned_data: Vec<FdMatrix>,
pub n_iter: usize,
pub converged: bool,
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct PcaNdResult {
pub scores: FdMatrix,
pub components: Vec<FdMatrix>,
pub explained_variance: Vec<f64>,
pub cumulative_variance: Vec<f64>,
pub covariance_eigenvalues: Vec<f64>,
}
fn srsf_single_nd(curve: &[Vec<f64>], argvals: &[f64]) -> Vec<Vec<f64>> {
let m = argvals.len();
let dims: Vec<FdMatrix> = curve
.iter()
.map(|c| FdMatrix::from_slice(c, 1, m).expect("dimension invariant: data.len() == n * m"))
.collect();
let cs = FdCurveSet { dims };
let q_set = srsf_transform_nd(&cs, argvals);
q_set.dims.iter().map(|dm| dm.row(0)).collect()
}
fn relative_change_nd(old: &[Vec<f64>], new: &[Vec<f64>]) -> f64 {
let mut diff_sq = 0.0;
let mut old_sq = 0.0;
for (qo, qn) in old.iter().zip(new.iter()) {
for (&a, &b) in qo.iter().zip(qn.iter()) {
diff_sq += (a - b).powi(2);
old_sq += a * a;
}
}
diff_sq.sqrt() / old_sq.sqrt().max(1e-10)
}
fn select_template_nd(data: &[FdCurveSet], srsfs: &[Vec<Vec<f64>>]) -> usize {
let n = data.len();
let d = srsfs[0].len();
let m = srsfs[0][0].len();
let mut mean_q: Vec<Vec<f64>> = vec![vec![0.0; m]; d];
for q in srsfs {
for k in 0..d {
for j in 0..m {
mean_q[k][j] += q[k][j];
}
}
}
for k in 0..d {
for j in 0..m {
mean_q[k][j] /= n as f64;
}
}
let mut min_dist = f64::INFINITY;
let mut min_idx = 0;
for (i, q) in srsfs.iter().enumerate() {
let mut dist_sq = 0.0;
for k in 0..d {
for j in 0..m {
dist_sq += (q[k][j] - mean_q[k][j]).powi(2);
}
}
if dist_sq < min_dist {
min_dist = dist_sq;
min_idx = i;
}
}
min_idx
}
#[must_use = "expensive computation whose result should not be discarded"]
pub fn karcher_mean_nd(
data: &[FdCurveSet],
argvals: &[f64],
max_iter: usize,
tol: f64,
lambda: f64,
) -> Result<KarcherMeanResultNd, FdarError> {
let n = data.len();
if n < 2 {
return Err(FdarError::InvalidDimension {
parameter: "data",
expected: "at least 2 curves".to_string(),
actual: format!("{n}"),
});
}
let d = data[0].ndim();
let m = data[0].npoints();
if d == 0 || m < 2 || argvals.len() != m {
return Err(FdarError::InvalidDimension {
parameter: "data/argvals",
expected: format!("d > 0, m >= 2, argvals.len() == m (m={m})"),
actual: format!("d={d}, m={m}, argvals.len()={}", argvals.len()),
});
}
for (i, cs) in data.iter().enumerate() {
if cs.ndim() != d || cs.npoints() != m {
return Err(FdarError::InvalidDimension {
parameter: "data",
expected: format!("all curves d={d}, m={m}"),
actual: format!("curve {i}: d={}, m={}", cs.ndim(), cs.npoints()),
});
}
}
let curves: Vec<Vec<Vec<f64>>> = (0..n)
.map(|i| data[i].dims.iter().map(|dm| dm.row(0)).collect())
.collect();
let srsfs: Vec<Vec<Vec<f64>>> = curves.iter().map(|c| srsf_single_nd(c, argvals)).collect();
let template_idx = select_template_nd(data, &srsfs);
let mut mu_q = srsfs[template_idx].clone();
let mut mu_f = curves[template_idx].clone();
let mut converged = false;
let mut n_iter = 0;
let mut gammas = FdMatrix::zeros(n, m);
for iter in 0..max_iter {
n_iter = iter + 1;
let align_results: Vec<(Vec<f64>, Vec<Vec<f64>>)> = iter_maybe_parallel!(0..n)
.map(|i| {
let mean_cs = {
let dims: Vec<FdMatrix> = mu_f
.iter()
.map(|v| {
FdMatrix::from_slice(v, 1, m)
.expect("dimension invariant: data.len() == n * m")
})
.collect();
FdCurveSet { dims }
};
let curve_cs = {
let dims: Vec<FdMatrix> = curves[i]
.iter()
.map(|v| {
FdMatrix::from_slice(v, 1, m)
.expect("dimension invariant: data.len() == n * m")
})
.collect();
FdCurveSet { dims }
};
let result = elastic_align_pair_nd(&mean_cs, &curve_cs, argvals, lambda);
(result.gamma, result.f_aligned)
})
.collect();
let mut new_mu_q: Vec<Vec<f64>> = vec![vec![0.0; m]; d];
for (i, (gamma, f_aligned)) in align_results.iter().enumerate() {
for j in 0..m {
gammas[(i, j)] = gamma[j];
}
let q_aligned = srsf_single_nd(f_aligned, argvals);
for k in 0..d {
for j in 0..m {
new_mu_q[k][j] += q_aligned[k][j];
}
}
}
for k in 0..d {
for j in 0..m {
new_mu_q[k][j] /= n as f64;
}
}
let rel = relative_change_nd(&mu_q, &new_mu_q);
mu_q = new_mu_q;
let f0: Vec<f64> = mu_f.iter().map(|v| v[0]).collect();
mu_f = srsf_inverse_nd(&mu_q, argvals, &f0);
if rel < tol {
converged = true;
break;
}
}
let gam_inv = super::sqrt_mean_inverse(&gammas, argvals);
for i in 0..n {
let gam_i: Vec<f64> = (0..m).map(|j| gammas[(i, j)]).collect();
let gam_centered = reparameterize_curve(&gam_i, argvals, &gam_inv);
for j in 0..m {
gammas[(i, j)] = gam_centered[j];
}
}
let mut aligned_data: Vec<FdMatrix> = (0..d).map(|_| FdMatrix::zeros(n, m)).collect();
for i in 0..n {
let gamma_i: Vec<f64> = (0..m).map(|j| gammas[(i, j)]).collect();
for k in 0..d {
let f_aligned = reparameterize_curve(&curves[i][k], argvals, &gamma_i);
for j in 0..m {
aligned_data[k][(i, j)] = f_aligned[j];
}
}
}
let mut mean: Vec<Vec<f64>> = vec![vec![0.0; m]; d];
for k in 0..d {
for j in 0..m {
for i in 0..n {
mean[k][j] += aligned_data[k][(i, j)];
}
mean[k][j] /= n as f64;
}
}
let mean_srsf = srsf_single_nd(&mean, argvals);
Ok(KarcherMeanResultNd {
mean,
mean_srsf,
gammas,
aligned_data,
n_iter,
converged,
})
}
#[must_use = "expensive computation whose result should not be discarded"]
pub fn karcher_covariance_nd(
result: &KarcherMeanResultNd,
argvals: &[f64],
) -> Result<FdMatrix, FdarError> {
let d = result.aligned_data.len();
if d == 0 {
return Err(FdarError::InvalidDimension {
parameter: "aligned_data",
expected: "d > 0".to_string(),
actual: "0".to_string(),
});
}
let (n, m) = result.aligned_data[0].shape();
if argvals.len() != m {
return Err(FdarError::InvalidDimension {
parameter: "argvals",
expected: format!("{m}"),
actual: format!("{}", argvals.len()),
});
}
let dm = d * m;
if dm > 10_000 {
return Err(FdarError::InvalidParameter {
parameter: "d*m",
message: format!(
"d*m = {dm} exceeds limit of 10000; covariance matrix would be too large"
),
});
}
if n < 2 {
return Err(FdarError::InvalidDimension {
parameter: "aligned_data",
expected: "n >= 2".to_string(),
actual: format!("{n}"),
});
}
let mut stacked = FdMatrix::zeros(n, dm);
for k in 0..d {
for i in 0..n {
for j in 0..m {
stacked[(i, k * m + j)] = result.aligned_data[k][(i, j)];
}
}
}
let mut col_mean = vec![0.0; dm];
for j in 0..dm {
for i in 0..n {
col_mean[j] += stacked[(i, j)];
}
col_mean[j] /= n as f64;
}
for i in 0..n {
for j in 0..dm {
stacked[(i, j)] -= col_mean[j];
}
}
let nf = (n - 1) as f64;
let mut cov = FdMatrix::zeros(dm, dm);
for p in 0..dm {
for q in p..dm {
let mut s = 0.0;
for i in 0..n {
s += stacked[(i, p)] * stacked[(i, q)];
}
s /= nf;
cov[(p, q)] = s;
cov[(q, p)] = s;
}
}
Ok(cov)
}
#[must_use = "expensive computation whose result should not be discarded"]
pub fn pca_nd(
result: &KarcherMeanResultNd,
argvals: &[f64],
ncomp: usize,
) -> Result<PcaNdResult, FdarError> {
let d = result.aligned_data.len();
if d == 0 {
return Err(FdarError::InvalidDimension {
parameter: "aligned_data",
expected: "d > 0".to_string(),
actual: "0".to_string(),
});
}
let (n, m) = result.aligned_data[0].shape();
if n < 2 || m < 2 || ncomp < 1 || argvals.len() != m {
return Err(FdarError::InvalidDimension {
parameter: "aligned_data/argvals/ncomp",
expected: "n >= 2, m >= 2, ncomp >= 1, argvals.len() == m".to_string(),
actual: format!(
"n={n}, m={m}, ncomp={ncomp}, argvals.len()={}",
argvals.len()
),
});
}
let ncomp = ncomp.min(n - 1);
let dm = d * m;
let mut stacked = FdMatrix::zeros(n, dm);
for k in 0..d {
for i in 0..n {
for j in 0..m {
stacked[(i, k * m + j)] = result.aligned_data[k][(i, j)];
}
}
}
let mut col_mean = vec![0.0; dm];
for j in 0..dm {
for i in 0..n {
col_mean[j] += stacked[(i, j)];
}
col_mean[j] /= n as f64;
}
for i in 0..n {
for j in 0..dm {
stacked[(i, j)] -= col_mean[j];
}
}
let nf = (n - 1) as f64;
let mut gram = FdMatrix::zeros(n, n);
for i in 0..n {
for j in i..n {
let mut s = 0.0;
for p in 0..dm {
s += stacked[(i, p)] * stacked[(j, p)];
}
s /= nf;
gram[(i, j)] = s;
gram[(j, i)] = s;
}
}
use nalgebra::SVD;
let svd = SVD::new(gram.to_dmatrix(), true, true);
let u = svd.u.as_ref().ok_or_else(|| FdarError::ComputationFailed {
operation: "SVD",
detail: "SVD failed to compute U matrix for Gram matrix".to_string(),
})?;
let eigenvalues: Vec<f64> = svd.singular_values.iter().take(ncomp).copied().collect();
let mut scores = FdMatrix::zeros(n, ncomp);
for k in 0..ncomp {
let scale = (eigenvalues[k] * nf).sqrt();
for i in 0..n {
scores[(i, k)] = u[(i, k)] * scale;
}
}
let mut components: Vec<FdMatrix> = (0..d).map(|_| FdMatrix::zeros(ncomp, m)).collect();
for k in 0..ncomp {
let scale = (eigenvalues[k] * nf).sqrt().max(1e-15);
let mut loading = vec![0.0; dm];
for p in 0..dm {
let mut s = 0.0;
for i in 0..n {
s += stacked[(i, p)] * u[(i, k)];
}
loading[p] = s / scale;
}
for dim in 0..d {
for j in 0..m {
components[dim][(k, j)] = loading[dim * m + j];
}
}
}
let total_var: f64 = svd.singular_values.iter().sum();
let mut cumulative_variance = Vec::with_capacity(ncomp);
let mut running = 0.0;
for ev in &eigenvalues {
running += ev;
cumulative_variance.push(if total_var > 0.0 {
running / total_var
} else {
0.0
});
}
let explained_variance = eigenvalues.clone();
let covariance_eigenvalues = eigenvalues;
Ok(PcaNdResult {
scores,
components,
explained_variance,
cumulative_variance,
covariance_eigenvalues,
})
}
#[cfg(test)]
mod tests {
use super::*;
use std::f64::consts::PI;
fn make_identical_curves(n: usize, m: usize) -> (Vec<FdCurveSet>, Vec<f64>) {
let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
let dim0: Vec<f64> = t.iter().map(|&ti| (2.0 * PI * ti).sin()).collect();
let dim1: Vec<f64> = t.iter().map(|&ti| (2.0 * PI * ti).cos()).collect();
let data: Vec<FdCurveSet> = (0..n)
.map(|_| {
let m0 = FdMatrix::from_slice(&dim0, 1, m)
.expect("dimension invariant: data.len() == n * m");
let m1 = FdMatrix::from_slice(&dim1, 1, m)
.expect("dimension invariant: data.len() == n * m");
FdCurveSet { dims: vec![m0, m1] }
})
.collect();
(data, t)
}
fn make_shifted_curves(n: usize, m: usize) -> (Vec<FdCurveSet>, Vec<f64>) {
let t: Vec<f64> = (0..m).map(|j| j as f64 / (m - 1) as f64).collect();
let data: Vec<FdCurveSet> = (0..n)
.map(|i| {
let shift = 0.05 * (i as f64 - n as f64 / 2.0);
let dim0: Vec<f64> = t
.iter()
.map(|&ti| (2.0 * PI * (ti + shift)).sin())
.collect();
let dim1: Vec<f64> = t
.iter()
.map(|&ti| (2.0 * PI * (ti + shift)).cos())
.collect();
let m0 = FdMatrix::from_slice(&dim0, 1, m)
.expect("dimension invariant: data.len() == n * m");
let m1 = FdMatrix::from_slice(&dim1, 1, m)
.expect("dimension invariant: data.len() == n * m");
FdCurveSet { dims: vec![m0, m1] }
})
.collect();
(data, t)
}
#[test]
fn karcher_mean_nd_identical_curves() {
let (data, t) = make_identical_curves(5, 31);
let result = karcher_mean_nd(&data, &t, 10, 1e-4, 0.0).expect("should succeed");
let d = 2;
let m = 31;
let input_dim0: Vec<f64> = t.iter().map(|&ti| (2.0 * PI * ti).sin()).collect();
let input_dim1: Vec<f64> = t.iter().map(|&ti| (2.0 * PI * ti).cos()).collect();
let max_diff_0: f64 = result.mean[0]
.iter()
.zip(input_dim0.iter())
.map(|(&a, &b)| (a - b).abs())
.fold(0.0_f64, f64::max);
let max_diff_1: f64 = result.mean[1]
.iter()
.zip(input_dim1.iter())
.map(|(&a, &b)| (a - b).abs())
.fold(0.0_f64, f64::max);
assert!(
max_diff_0 < 0.3,
"Mean dim 0 should be close to input, max diff = {max_diff_0}"
);
assert!(
max_diff_1 < 0.3,
"Mean dim 1 should be close to input, max diff = {max_diff_1}"
);
let n = 5;
for i in 0..n {
for j in 0..m {
let diff = (result.gammas[(i, j)] - t[j]).abs();
assert!(
diff < 0.15,
"Warp for identical curves should be near identity: gamma[{i},{j}] diff = {diff}"
);
}
}
assert_eq!(result.mean.len(), d);
assert_eq!(result.mean_srsf.len(), d);
assert_eq!(result.aligned_data.len(), d);
}
#[test]
fn karcher_mean_nd_output_dimensions() {
let (data, t) = make_shifted_curves(8, 25);
let result = karcher_mean_nd(&data, &t, 5, 1e-3, 0.0).expect("should succeed");
let n = 8;
let m = 25;
let d = 2;
assert_eq!(result.mean.len(), d);
assert_eq!(result.mean_srsf.len(), d);
for k in 0..d {
assert_eq!(result.mean[k].len(), m);
assert_eq!(result.mean_srsf[k].len(), m);
}
assert_eq!(result.gammas.shape(), (n, m));
assert_eq!(result.aligned_data.len(), d);
for k in 0..d {
assert_eq!(result.aligned_data[k].shape(), (n, m));
}
assert!(result.n_iter <= 5);
}
#[test]
fn karcher_mean_nd_convergence() {
let (data, t) = make_shifted_curves(10, 31);
let result = karcher_mean_nd(&data, &t, 20, 1e-3, 0.0).expect("should succeed");
assert!(
result.converged,
"Algorithm should converge for shifted sine curves, n_iter={}",
result.n_iter
);
}
#[test]
fn pca_nd_basic_properties() {
let (data, t) = make_shifted_curves(10, 31);
let km = karcher_mean_nd(&data, &t, 10, 1e-3, 0.0).expect("karcher_mean should succeed");
let pca = pca_nd(&km, &t, 3).expect("pca_nd should succeed");
let n = 10;
let ncomp = 3;
let m = 31;
assert_eq!(pca.scores.shape(), (n, ncomp));
assert_eq!(pca.components.len(), 2);
for comp in &pca.components {
assert_eq!(comp.shape(), (ncomp, m));
}
for ev in &pca.explained_variance {
assert!(
*ev >= -1e-10,
"Explained variance should be non-negative: {ev}"
);
}
for i in 1..pca.explained_variance.len() {
assert!(
pca.explained_variance[i] <= pca.explained_variance[i - 1] + 1e-8,
"Explained variance should be decreasing: {} > {}",
pca.explained_variance[i],
pca.explained_variance[i - 1]
);
}
for i in 1..pca.cumulative_variance.len() {
assert!(
pca.cumulative_variance[i] >= pca.cumulative_variance[i - 1] - 1e-10,
"Cumulative variance should be increasing"
);
}
}
#[test]
fn karcher_covariance_nd_symmetric() {
let (data, t) = make_shifted_curves(8, 21);
let km = karcher_mean_nd(&data, &t, 5, 1e-3, 0.0).expect("karcher_mean should succeed");
let cov = karcher_covariance_nd(&km, &t).expect("covariance should succeed");
let dm = 2 * 21;
assert_eq!(cov.shape(), (dm, dm));
for p in 0..dm {
for q in p..dm {
let diff = (cov[(p, q)] - cov[(q, p)]).abs();
assert!(
diff < 1e-12,
"Covariance should be symmetric at ({p},{q}): diff = {diff}"
);
}
}
}
}