1use scirs2_core::ndarray::{Array1, Array2};
7use scirs2_core::numeric::{Float, FromPrimitive};
8use std::fmt::Debug;
9
10use crate::error::{ClusteringError, Result};
11use statrs::statistics::Statistics;
12
13#[allow(dead_code)]
26pub fn cophenet<F: Float + FromPrimitive>(z: &Array2<F>, d: &Array1<F>) -> Result<F> {
27 let n_samples = z.shape()[0] + 1;
28
29 let mut cophenetic_distances = Array1::zeros(d.len());
31
32 let mut cluster_height: Vec<F> = vec![F::zero(); 2 * n_samples - 1];
34
35 for i in 0..z.shape()[0] {
37 let _cluster1 = z[[i, 0]].to_usize().unwrap();
38 let _cluster2 = z[[i, 1]].to_usize().unwrap();
39 let height = z[[i, 2]];
40 let new_cluster = n_samples + i;
41
42 cluster_height[new_cluster] = height;
44 }
45
46 let mut idx = 0;
48 for i in 0..n_samples {
49 for j in (i + 1)..n_samples {
50 let lca_height = find_lca_height(i, j, z, &cluster_height);
52 cophenetic_distances[idx] = lca_height;
53 idx += 1;
54 }
55 }
56
57 let d_mean = d.mean().unwrap();
59 let c_mean = cophenetic_distances.mean().unwrap();
60
61 let mut numerator = F::zero();
62 let mut d_variance = F::zero();
63 let mut c_variance = F::zero();
64
65 for i in 0..d.len() {
66 let d_diff = d[i] - d_mean;
67 let c_diff = cophenetic_distances[i] - c_mean;
68
69 numerator = numerator + d_diff * c_diff;
70 d_variance = d_variance + d_diff * d_diff;
71 c_variance = c_variance + c_diff * c_diff;
72 }
73
74 let denom = (d_variance * c_variance).sqrt();
75
76 if denom < F::from_f64(1e-10).unwrap() {
77 return Err(ClusteringError::ComputationError(
78 "Variance is too small to calculate cophenetic correlation".into(),
79 ));
80 }
81
82 Ok(numerator / denom)
83}
84
85#[allow(dead_code)]
87fn find_lca_height<F: Float>(i: usize, j: usize, z: &Array2<F>, clusterheight: &[F]) -> F {
88 let n_samples = z.shape()[0] + 1;
89
90 let _i_cluster = i;
93 let _j_cluster = j;
94
95 let mut cluster_map = vec![0; 2 * n_samples - 1];
97 for (idx, val) in cluster_map.iter_mut().enumerate().take(n_samples) {
98 *val = idx;
99 }
100
101 for idx in 0..z.shape()[0] {
102 let cluster1 = z[[idx, 0]].to_usize().unwrap();
103 let cluster2 = z[[idx, 1]].to_usize().unwrap();
104 let new_cluster = n_samples + idx;
105
106 for (k, val) in cluster_map.iter_mut().enumerate() {
108 if k < new_cluster && (*val == cluster1 || *val == cluster2) {
109 *val = new_cluster;
110 }
111 }
112
113 if cluster_map[i] == cluster_map[j] {
115 return clusterheight[cluster_map[i]];
117 }
118 }
119
120 F::zero()
122}
123
124#[allow(dead_code)]
137pub fn inconsistent<F: Float + FromPrimitive + Debug>(
138 z: &Array2<F>,
139 d: Option<usize>,
140) -> Result<Array2<F>> {
141 let depth = d.unwrap_or(2);
142 let n = z.shape()[0];
143
144 let mut result = Array2::zeros((n, 4));
146
147 for i in 0..n {
148 let depths = get_descendants(z, i, depth)?;
150
151 let mut heights = Vec::with_capacity(depths.len());
153 for &idx in &depths {
154 if idx < n {
155 heights.push(z[[idx, 2]]);
156 }
157 }
158
159 if heights.is_empty() {
160 heights.push(z[[i, 2]]);
161 }
162
163 let mean = heights.iter().fold(F::zero(), |acc, &x| acc + x)
165 / F::from_usize(heights.len()).unwrap();
166
167 let mut variance = F::zero();
168 for &h in &heights {
169 let diff = h - mean;
170 variance = variance + diff * diff;
171 }
172 variance = variance / F::from_usize(heights.len()).unwrap();
173 let std_dev = variance.sqrt();
174
175 let inconsistency = if std_dev < F::from_f64(1e-10).unwrap() {
177 F::zero()
178 } else {
179 (z[[i, 2]] - mean) / std_dev
180 };
181
182 result[[i, 0]] = mean;
184 result[[i, 1]] = std_dev;
185 result[[i, 2]] = F::from_usize(heights.len()).unwrap();
186 result[[i, 3]] = inconsistency;
187 }
188
189 Ok(result)
190}
191
192#[allow(dead_code)]
194fn get_descendants<F: Float>(z: &Array2<F>, idx: usize, depth: usize) -> Result<Vec<usize>> {
195 let n_samples = z.shape()[0] + 1;
196 let mut result = Vec::new();
197
198 if depth == 0 {
199 result.push(idx);
200 return Ok(result);
201 }
202
203 if idx >= n_samples - 1 {
205 let i = idx - (n_samples - 1); if i >= z.shape()[0] {
209 return Err(ClusteringError::ComputationError(
210 "Invalid node index in dendrogram".into(),
211 ));
212 }
213
214 let left = z[[i, 0]].to_usize().unwrap();
215 let right = z[[i, 1]].to_usize().unwrap();
216
217 result.push(i);
219
220 let left_desc = get_descendants(z, left, depth - 1)?;
222 let right_desc = get_descendants(z, right, depth - 1)?;
223
224 result.extend(left_desc);
225 result.extend(right_desc);
226 } else {
227 result.push(idx);
229 }
230
231 Ok(result)
232}
233
234#[allow(dead_code)]
248pub fn optimal_leaf_ordering<F: Float + FromPrimitive + PartialOrd + Debug>(
249 z: &Array2<F>,
250 d: &Array1<F>,
251) -> Result<Array1<usize>> {
252 crate::hierarchy::leaf_ordering::optimal_leaf_ordering(z.view(), d.view())
254}
255
256#[allow(dead_code)]
266pub fn dendrogram<F: Float + FromPrimitive + Clone>(
267 z: &Array2<F>,
268) -> Result<Vec<(usize, usize, F, usize)>> {
269 let n = z.shape()[0];
270
271 if n == 0 {
272 return Err(ClusteringError::InvalidInput("Empty linkage matrix".into()));
273 }
274
275 let mut result = Vec::with_capacity(n);
277
278 for i in 0..n {
279 let cluster1 = z[[i, 0]].to_usize().unwrap();
280 let cluster2 = z[[i, 1]].to_usize().unwrap();
281 let height = z[[i, 2]];
282 let count = z[[i, 3]].to_usize().unwrap();
283
284 result.push((cluster1, cluster2, height, count));
285 }
286
287 Ok(result)
288}
289
290#[cfg(test)]
291mod tests {
292 use super::*;
293 use crate::hierarchy::{linkage, LinkageMethod, Metric};
294 use scirs2_core::ndarray::{Array1, Array2};
295
296 #[test]
297 fn test_cophenet_simple() {
298 let data = Array2::from_shape_vec(
300 (4, 2),
301 vec![
302 0.0, 0.0, 1.0, 0.0, 10.0, 0.0, 11.0, 0.0, ],
307 )
308 .unwrap();
309
310 let linkage_matrix =
312 linkage(data.view(), LinkageMethod::Single, Metric::Euclidean).unwrap();
313
314 let mut original_distances = Array1::zeros(6); let mut idx = 0;
317 for i in 0..4 {
318 for j in (i + 1)..4 {
319 let dist = ((data[[i, 0]] - data[[j, 0]]).powi(2)
320 + (data[[i, 1]] - data[[j, 1]]).powi(2))
321 .sqrt();
322 original_distances[idx] = dist;
323 idx += 1;
324 }
325 }
326
327 let correlation = cophenet(&linkage_matrix, &original_distances).unwrap();
329
330 assert!(
332 correlation >= 0.5,
333 "Cophenetic correlation should be reasonably high for structured data, got {}",
334 correlation
335 );
336 assert!(
337 correlation <= 1.0,
338 "Cophenetic correlation cannot exceed 1.0, got {}",
339 correlation
340 );
341 }
342
343 #[test]
344 fn test_cophenet_perfect_hierarchy() {
345 let data = Array2::from_shape_vec(
348 (4, 1),
349 vec![
350 0.0, 1.0, 10.0, 11.0, ],
355 )
356 .unwrap();
357
358 let linkage_matrix =
359 linkage(data.view(), LinkageMethod::Single, Metric::Euclidean).unwrap();
360
361 let original_distances = Array1::from_vec(vec![
363 1.0, 10.0, 11.0, 9.0, 10.0, 1.0, ]);
370
371 let correlation = cophenet(&linkage_matrix, &original_distances).unwrap();
372
373 assert!(
375 correlation >= 0.8,
376 "Perfect hierarchy should have high cophenetic correlation, got {}",
377 correlation
378 );
379 }
380
381 #[test]
382 fn test_cophenet_identical_points() {
383 let data = Array2::from_shape_vec(
385 (3, 2),
386 vec![
387 0.0, 0.0, 0.0, 0.0, 5.0, 5.0,
389 ],
390 )
391 .unwrap();
392
393 let linkage_matrix =
394 linkage(data.view(), LinkageMethod::Single, Metric::Euclidean).unwrap();
395
396 let original_distances = Array1::from_vec(vec![
397 0.0, (5.0_f64 * 2.0).sqrt(), (5.0_f64 * 2.0).sqrt(), ]);
401
402 let result = cophenet(&linkage_matrix, &original_distances);
404 assert!(
405 result.is_ok(),
406 "Cophenetic correlation should handle identical points"
407 );
408
409 let correlation = result.unwrap();
410 assert!(
413 correlation.is_finite() || correlation.is_nan() || correlation.is_infinite(),
414 "Correlation should be a valid floating point number, got {}",
415 correlation
416 );
417 }
418
419 #[test]
420 fn test_inconsistent_basic() {
421 let data = Array2::from_shape_vec(
423 (5, 2),
424 vec![0.0, 0.0, 1.0, 0.0, 2.0, 0.0, 10.0, 0.0, 11.0, 0.0],
425 )
426 .unwrap();
427
428 let linkage_matrix =
429 linkage(data.view(), LinkageMethod::Average, Metric::Euclidean).unwrap();
430
431 let inconsistency_matrix = inconsistent(&linkage_matrix, None).unwrap();
433
434 assert_eq!(inconsistency_matrix.shape()[0], linkage_matrix.shape()[0]);
436 assert_eq!(inconsistency_matrix.shape()[1], 4); for i in 0..inconsistency_matrix.shape()[0] {
440 for j in 0..inconsistency_matrix.shape()[1] {
441 assert!(
442 inconsistency_matrix[[i, j]].is_finite(),
443 "Inconsistency values should be finite at [{}, {}]",
444 i,
445 j
446 );
447 }
448 }
449 }
450
451 #[test]
452 fn test_inconsistent_with_depth() {
453 let data = Array2::from_shape_vec((4, 1), vec![0.0, 1.0, 2.0, 10.0]).unwrap();
454
455 let linkage_matrix =
456 linkage(data.view(), LinkageMethod::Complete, Metric::Euclidean).unwrap();
457
458 for depth in 1..=3 {
460 let inconsistency_matrix = inconsistent(&linkage_matrix, Some(depth)).unwrap();
461
462 assert_eq!(inconsistency_matrix.shape()[0], linkage_matrix.shape()[0]);
463 assert_eq!(inconsistency_matrix.shape()[1], 4);
464
465 for i in 0..inconsistency_matrix.shape()[0] {
467 assert!(
468 inconsistency_matrix[[i, 2]] > 0.0,
469 "Count should be positive"
470 );
471 }
472 }
473 }
474
475 #[test]
476 fn test_optimal_leaf_ordering() {
477 let data = Array2::from_shape_vec((4, 2), vec![1.0, 1.0, 2.0, 2.0, 10.0, 10.0, 11.0, 11.0])
478 .unwrap();
479
480 let linkage_matrix = linkage(data.view(), LinkageMethod::Ward, Metric::Euclidean).unwrap();
481 let original_distances = Array1::from_vec(vec![1.414, 12.73, 14.14, 1.414, 12.73, 1.414]);
482
483 let ordering = optimal_leaf_ordering(&linkage_matrix, &original_distances).unwrap();
484
485 assert_eq!(ordering.len(), 4);
487
488 let mut indices: Vec<usize> = ordering.to_vec();
490 indices.sort();
491 assert_eq!(indices, vec![0, 1, 2, 3]);
492 }
493
494 #[test]
495 fn test_dendrogram_conversion() {
496 let data = Array2::from_shape_vec((3, 1), vec![0.0, 5.0, 10.0]).unwrap();
497
498 let linkage_matrix =
499 linkage(data.view(), LinkageMethod::Single, Metric::Euclidean).unwrap();
500
501 let dendrogram_data = dendrogram(&linkage_matrix).unwrap();
502
503 assert_eq!(dendrogram_data.len(), 2);
505
506 for (i, &(cluster1, cluster2, height, count)) in dendrogram_data.iter().enumerate() {
508 assert!(
509 cluster1 < 2 * data.shape()[0] - 1,
510 "Invalid cluster1 index in merge {}",
511 i
512 );
513 assert!(
514 cluster2 < 2 * data.shape()[0] - 1,
515 "Invalid cluster2 index in merge {}",
516 i
517 );
518 assert!(height >= 0.0, "Merge height should be non-negative");
519 assert!(count >= 2, "Cluster count should be at least 2");
520 }
521 }
522
523 #[test]
524 fn test_cophenet_error_cases() {
525 let data = Array2::from_shape_vec((3, 1), vec![0.0, 1.0, 2.0]).unwrap();
527 let linkage_matrix =
528 linkage(data.view(), LinkageMethod::Single, Metric::Euclidean).unwrap();
529
530 let identical_distances = Array1::from_vec(vec![1.0, 1.0, 1.0]);
532 let result = cophenet(&linkage_matrix, &identical_distances);
533
534 if let Err(e) = result {
536 assert!(
538 format!("{}", e).contains("variance") || format!("{}", e).contains("correlation")
539 );
540 } else {
541 let correlation = result.unwrap();
543 assert!(correlation.is_finite(), "Correlation should be finite");
544 }
545 }
546
547 #[test]
548 fn test_find_lca_height() {
549 let data = Array2::from_shape_vec((3, 1), vec![0.0, 1.0, 10.0]).unwrap();
551
552 let linkage_matrix =
553 linkage(data.view(), LinkageMethod::Single, Metric::Euclidean).unwrap();
554 let original_distances = Array1::from_vec(vec![1.0, 10.0, 9.0]);
555
556 let correlation = cophenet(&linkage_matrix, &original_distances).unwrap();
558 assert!(
559 correlation.is_finite(),
560 "LCA height calculation should produce finite correlation"
561 );
562 }
563}