use scirs2_core::ndarray::{Array1, Array2};
use scirs2_core::numeric::{Float, FromPrimitive};
use std::fmt::Debug;
use crate::error::{ClusteringError, Result};
use statrs::statistics::Statistics;
#[allow(dead_code)]
pub fn cophenet<F: Float + FromPrimitive>(z: &Array2<F>, d: &Array1<F>) -> Result<F> {
let n_samples = z.shape()[0] + 1;
let mut cophenetic_distances = Array1::zeros(d.len());
let mut cluster_height: Vec<F> = vec![F::zero(); 2 * n_samples - 1];
for i in 0..z.shape()[0] {
let _cluster1 = z[[i, 0]].to_usize().expect("Operation failed");
let _cluster2 = z[[i, 1]].to_usize().expect("Operation failed");
let height = z[[i, 2]];
let new_cluster = n_samples + i;
cluster_height[new_cluster] = height;
}
let mut idx = 0;
for i in 0..n_samples {
for j in (i + 1)..n_samples {
let lca_height = find_lca_height(i, j, z, &cluster_height);
cophenetic_distances[idx] = lca_height;
idx += 1;
}
}
let d_mean = d.mean().expect("Operation failed");
let c_mean = cophenetic_distances.mean().expect("Operation failed");
let mut numerator = F::zero();
let mut d_variance = F::zero();
let mut c_variance = F::zero();
for i in 0..d.len() {
let d_diff = d[i] - d_mean;
let c_diff = cophenetic_distances[i] - c_mean;
numerator = numerator + d_diff * c_diff;
d_variance = d_variance + d_diff * d_diff;
c_variance = c_variance + c_diff * c_diff;
}
let denom = (d_variance * c_variance).sqrt();
if denom < F::from_f64(1e-10).expect("Operation failed") {
return Err(ClusteringError::ComputationError(
"Variance is too small to calculate cophenetic correlation".into(),
));
}
Ok(numerator / denom)
}
#[allow(dead_code)]
fn find_lca_height<F: Float>(i: usize, j: usize, z: &Array2<F>, clusterheight: &[F]) -> F {
let n_samples = z.shape()[0] + 1;
let _i_cluster = i;
let _j_cluster = j;
let mut cluster_map = vec![0; 2 * n_samples - 1];
for (idx, val) in cluster_map.iter_mut().enumerate().take(n_samples) {
*val = idx;
}
for idx in 0..z.shape()[0] {
let cluster1 = z[[idx, 0]].to_usize().expect("Operation failed");
let cluster2 = z[[idx, 1]].to_usize().expect("Operation failed");
let new_cluster = n_samples + idx;
for (k, val) in cluster_map.iter_mut().enumerate() {
if k < new_cluster && (*val == cluster1 || *val == cluster2) {
*val = new_cluster;
}
}
if cluster_map[i] == cluster_map[j] {
return clusterheight[cluster_map[i]];
}
}
F::zero()
}
#[allow(dead_code)]
pub fn inconsistent<F: Float + FromPrimitive + Debug>(
z: &Array2<F>,
d: Option<usize>,
) -> Result<Array2<F>> {
let depth = d.unwrap_or(2);
let n = z.shape()[0];
let mut result = Array2::zeros((n, 4));
for i in 0..n {
let depths = get_descendants(z, i, depth)?;
let mut heights = Vec::with_capacity(depths.len());
for &idx in &depths {
if idx < n {
heights.push(z[[idx, 2]]);
}
}
if heights.is_empty() {
heights.push(z[[i, 2]]);
}
let mean = heights.iter().fold(F::zero(), |acc, &x| acc + x)
/ F::from_usize(heights.len()).expect("Operation failed");
let mut variance = F::zero();
for &h in &heights {
let diff = h - mean;
variance = variance + diff * diff;
}
variance = variance / F::from_usize(heights.len()).expect("Operation failed");
let std_dev = variance.sqrt();
let inconsistency = if std_dev < F::from_f64(1e-10).expect("Operation failed") {
F::zero()
} else {
(z[[i, 2]] - mean) / std_dev
};
result[[i, 0]] = mean;
result[[i, 1]] = std_dev;
result[[i, 2]] = F::from_usize(heights.len()).expect("Operation failed");
result[[i, 3]] = inconsistency;
}
Ok(result)
}
#[allow(dead_code)]
fn get_descendants<F: Float>(z: &Array2<F>, idx: usize, depth: usize) -> Result<Vec<usize>> {
let n_samples = z.shape()[0] + 1;
let mut result = Vec::new();
if depth == 0 {
result.push(idx);
return Ok(result);
}
if idx >= n_samples - 1 {
let i = idx - (n_samples - 1);
if i >= z.shape()[0] {
return Err(ClusteringError::ComputationError(
"Invalid node index in dendrogram".into(),
));
}
let left = z[[i, 0]].to_usize().expect("Operation failed");
let right = z[[i, 1]].to_usize().expect("Operation failed");
result.push(i);
let left_desc = get_descendants(z, left, depth - 1)?;
let right_desc = get_descendants(z, right, depth - 1)?;
result.extend(left_desc);
result.extend(right_desc);
} else {
result.push(idx);
}
Ok(result)
}
#[allow(dead_code)]
pub fn optimal_leaf_ordering<F: Float + FromPrimitive + PartialOrd + Debug>(
z: &Array2<F>,
d: &Array1<F>,
) -> Result<Array1<usize>> {
crate::hierarchy::leaf_ordering::optimal_leaf_ordering(z.view(), d.view())
}
#[allow(dead_code)]
pub fn dendrogram<F: Float + FromPrimitive + Clone>(
z: &Array2<F>,
) -> Result<Vec<(usize, usize, F, usize)>> {
let n = z.shape()[0];
if n == 0 {
return Err(ClusteringError::InvalidInput("Empty linkage matrix".into()));
}
let mut result = Vec::with_capacity(n);
for i in 0..n {
let cluster1 = z[[i, 0]].to_usize().expect("Operation failed");
let cluster2 = z[[i, 1]].to_usize().expect("Operation failed");
let height = z[[i, 2]];
let count = z[[i, 3]].to_usize().expect("Operation failed");
result.push((cluster1, cluster2, height, count));
}
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::hierarchy::{linkage, LinkageMethod, Metric};
use scirs2_core::ndarray::{Array1, Array2};
#[test]
fn test_cophenet_simple() {
let data = Array2::from_shape_vec(
(4, 2),
vec![
0.0, 0.0, 1.0, 0.0, 10.0, 0.0, 11.0, 0.0, ],
)
.expect("Test: operation failed");
let linkage_matrix = linkage(data.view(), LinkageMethod::Single, Metric::Euclidean)
.expect("Operation failed");
let mut original_distances = Array1::zeros(6); let mut idx = 0;
for i in 0..4 {
for j in (i + 1)..4 {
let dist = ((data[[i, 0]] - data[[j, 0]]).powi(2)
+ (data[[i, 1]] - data[[j, 1]]).powi(2))
.sqrt();
original_distances[idx] = dist;
idx += 1;
}
}
let correlation = cophenet(&linkage_matrix, &original_distances).expect("Operation failed");
assert!(
correlation >= 0.5,
"Cophenetic correlation should be reasonably high for structured data, got {}",
correlation
);
assert!(
correlation <= 1.0,
"Cophenetic correlation cannot exceed 1.0, got {}",
correlation
);
}
#[test]
fn test_cophenet_perfect_hierarchy() {
let data = Array2::from_shape_vec(
(4, 1),
vec![
0.0, 1.0, 10.0, 11.0, ],
)
.expect("Test: operation failed");
let linkage_matrix = linkage(data.view(), LinkageMethod::Single, Metric::Euclidean)
.expect("Operation failed");
let original_distances = Array1::from_vec(vec![
1.0, 10.0, 11.0, 9.0, 10.0, 1.0, ]);
let correlation = cophenet(&linkage_matrix, &original_distances).expect("Operation failed");
assert!(
correlation >= 0.8,
"Perfect hierarchy should have high cophenetic correlation, got {}",
correlation
);
}
#[test]
fn test_cophenet_identical_points() {
let data = Array2::from_shape_vec(
(3, 2),
vec![
0.0, 0.0, 0.0, 0.0, 5.0, 5.0,
],
)
.expect("Test: operation failed");
let linkage_matrix = linkage(data.view(), LinkageMethod::Single, Metric::Euclidean)
.expect("Operation failed");
let original_distances = Array1::from_vec(vec![
0.0, (5.0_f64 * 2.0).sqrt(), (5.0_f64 * 2.0).sqrt(), ]);
let result = cophenet(&linkage_matrix, &original_distances);
assert!(
result.is_ok(),
"Cophenetic correlation should handle identical points"
);
let correlation = result.expect("Test: operation failed");
assert!(
correlation.is_finite() || correlation.is_nan() || correlation.is_infinite(),
"Correlation should be a valid floating point number, got {}",
correlation
);
}
#[test]
fn test_inconsistent_basic() {
let data = Array2::from_shape_vec(
(5, 2),
vec![0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 10.0, 0.0, 11.0, 0.0],
)
.expect("Test: operation failed");
let linkage_matrix = linkage(data.view(), LinkageMethod::Average, Metric::Euclidean)
.expect("Operation failed");
let inconsistency_matrix = inconsistent(&linkage_matrix, None).expect("Operation failed");
assert_eq!(inconsistency_matrix.shape()[0], linkage_matrix.shape()[0]);
assert_eq!(inconsistency_matrix.shape()[1], 4);
for i in 0..inconsistency_matrix.shape()[0] {
for j in 0..inconsistency_matrix.shape()[1] {
assert!(
inconsistency_matrix[[i, j]].is_finite(),
"Inconsistency values should be finite at [{}, {}]",
i,
j
);
}
}
}
#[test]
fn test_inconsistent_with_depth() {
let data =
Array2::from_shape_vec((4, 1), vec![0.0, 1.0, 2.0, 10.0]).expect("Operation failed");
let linkage_matrix = linkage(data.view(), LinkageMethod::Complete, Metric::Euclidean)
.expect("Operation failed");
for depth in 1..=3 {
let inconsistency_matrix =
inconsistent(&linkage_matrix, Some(depth)).expect("Operation failed");
assert_eq!(inconsistency_matrix.shape()[0], linkage_matrix.shape()[0]);
assert_eq!(inconsistency_matrix.shape()[1], 4);
for i in 0..inconsistency_matrix.shape()[0] {
assert!(
inconsistency_matrix[[i, 2]] > 0.0,
"Count should be positive"
);
}
}
}
#[test]
fn test_optimal_leaf_ordering() {
let data = Array2::from_shape_vec((4, 2), vec![1.0, 1.0, 2.0, 2.0, 10.0, 10.0, 11.0, 11.0])
.expect("Test: operation failed");
let linkage_matrix =
linkage(data.view(), LinkageMethod::Ward, Metric::Euclidean).expect("Operation failed");
let original_distances = Array1::from_vec(vec![1.414, 12.73, 14.14, 1.414, 12.73, 1.414]);
let ordering =
optimal_leaf_ordering(&linkage_matrix, &original_distances).expect("Operation failed");
assert_eq!(ordering.len(), 4);
let mut indices: Vec<usize> = ordering.to_vec();
indices.sort();
assert_eq!(indices, vec![0, 1, 2, 3]);
}
#[test]
fn test_dendrogram_conversion() {
let data = Array2::from_shape_vec((3, 1), vec![0.0, 5.0, 10.0]).expect("Operation failed");
let linkage_matrix = linkage(data.view(), LinkageMethod::Single, Metric::Euclidean)
.expect("Operation failed");
let dendrogram_data = dendrogram(&linkage_matrix).expect("Operation failed");
assert_eq!(dendrogram_data.len(), 2);
for (i, &(cluster1, cluster2, height, count)) in dendrogram_data.iter().enumerate() {
assert!(
cluster1 < 2 * data.shape()[0] - 1,
"Invalid cluster1 index in merge {}",
i
);
assert!(
cluster2 < 2 * data.shape()[0] - 1,
"Invalid cluster2 index in merge {}",
i
);
assert!(height >= 0.0, "Merge height should be non-negative");
assert!(count >= 2, "Cluster count should be at least 2");
}
}
#[test]
fn test_cophenet_error_cases() {
let data = Array2::from_shape_vec((3, 1), vec![0.0, 1.0, 2.0]).expect("Operation failed");
let linkage_matrix = linkage(data.view(), LinkageMethod::Single, Metric::Euclidean)
.expect("Operation failed");
let identical_distances = Array1::from_vec(vec![1.0, 1.0, 1.0]);
let result = cophenet(&linkage_matrix, &identical_distances);
if let Err(e) = result {
assert!(
format!("{}", e).contains("variance") || format!("{}", e).contains("correlation")
);
} else {
let correlation = result.expect("Test: operation failed");
assert!(correlation.is_finite(), "Correlation should be finite");
}
}
#[test]
fn test_find_lca_height() {
let data = Array2::from_shape_vec((3, 1), vec![0.0, 1.0, 10.0]).expect("Operation failed");
let linkage_matrix = linkage(data.view(), LinkageMethod::Single, Metric::Euclidean)
.expect("Operation failed");
let original_distances = Array1::from_vec(vec![1.0, 10.0, 9.0]);
let correlation = cophenet(&linkage_matrix, &original_distances).expect("Operation failed");
assert!(
correlation.is_finite(),
"LCA height calculation should produce finite correlation"
);
}
}