scirs2_metrics/clustering/
mod.rs1pub mod evaluation;
25pub mod external_metrics;
26mod internal_metrics;
27
28pub mod density;
30pub mod distance;
31pub mod validation;
32
33pub use self::density::*;
35pub use self::distance::*;
36pub use self::evaluation::{dunn_index_enhanced, elbow_method};
37pub use self::external_metrics::*;
38pub use self::internal_metrics::*;
39pub use self::validation::*;
40
41use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Data, Dimension, Ix2};
43use scirs2_core::numeric::{Float, NumCast};
44use std::collections::HashMap;
45
46use crate::error::{MetricsError, Result};
47
48pub(crate) fn calculate_distance<F>(a: &[F], b: &[F], metric: &str) -> Result<F>
60where
61 F: Float + NumCast + std::fmt::Debug,
62{
63 if a.len() != b.len() {
64 return Err(MetricsError::InvalidInput(format!(
65 "Points must have the same dimension, got {} and {}",
66 a.len(),
67 b.len()
68 )));
69 }
70
71 match metric.to_lowercase().as_str() {
72 "euclidean" => {
73 let mut sum = F::zero();
74 for (ai, bi) in a.iter().zip(b.iter()) {
75 let diff = *ai - *bi;
76 sum = sum + diff * diff;
77 }
78 Ok(sum.sqrt())
79 }
80 "manhattan" => {
81 let mut sum = F::zero();
82 for (ai, bi) in a.iter().zip(b.iter()) {
83 sum = sum + (*ai - *bi).abs();
84 }
85 Ok(sum)
86 }
87 "cosine" => {
88 let mut dot = F::zero();
89 let mut norm_a = F::zero();
90 let mut norm_b = F::zero();
91
92 for (ai, bi) in a.iter().zip(b.iter()) {
93 dot = dot + *ai * *bi;
94 norm_a = norm_a + *ai * *ai;
95 norm_b = norm_b + *bi * *bi;
96 }
97
98 if norm_a < F::epsilon() || norm_b < F::epsilon() {
99 return Err(MetricsError::InvalidInput(
100 "Cannot compute cosine distance for zero vectors".to_string(),
101 ));
102 }
103
104 let cosine_similarity = dot / (norm_a.sqrt() * norm_b.sqrt());
105
106 Ok(F::one() - cosine_similarity)
108 }
109 _ => Err(MetricsError::InvalidInput(format!(
110 "Unknown distance metric: {metric}"
111 ))),
112 }
113}
114
115pub(crate) fn pairwise_distances<F, S>(x: &ArrayBase<S, Ix2>, metric: &str) -> Result<Array2<F>>
126where
127 F: Float + NumCast + std::fmt::Debug,
128 S: Data<Elem = F>,
129{
130 let (n_samples, _n_features) = x.dim();
131 let mut distances = Array2::zeros((n_samples, n_samples));
132
133 for i in 0..n_samples {
134 for j in i..n_samples {
135 let row_i = x.row(i).to_vec();
136 let row_j = x.row(j).to_vec();
137
138 let dist = calculate_distance(&row_i, &row_j, metric)?;
139
140 distances[[i, j]] = dist;
141 if i != j {
142 distances[[j, i]] = dist; }
144 }
145 }
146
147 Ok(distances)
148}
149
150pub(crate) fn group_by_labels<F, T, S1, S2, D>(
161 x: &ArrayBase<S1, Ix2>,
162 labels: &ArrayBase<S2, D>,
163) -> Result<HashMap<T, Vec<usize>>>
164where
165 F: Float + NumCast + std::fmt::Debug,
166 T: std::hash::Hash + std::cmp::Eq + Copy,
167 S1: Data<Elem = F>,
168 S2: Data<Elem = T>,
169 D: Dimension,
170{
171 let n_samples = x.shape()[0];
172
173 if labels.len() != n_samples {
174 return Err(MetricsError::InvalidInput(format!(
175 "Number of labels ({}) does not match number of samples ({})",
176 labels.len(),
177 n_samples
178 )));
179 }
180
181 let mut clusters: HashMap<T, Vec<usize>> = HashMap::new();
182
183 for (i, &label) in labels.iter().enumerate() {
184 clusters.entry(label).or_default().push(i);
185 }
186
187 Ok(clusters)
188}