pub mod evaluation;
pub mod external_metrics;
mod internal_metrics;
pub mod density;
pub mod distance;
pub mod validation;
pub use self::density::*;
pub use self::distance::*;
pub use self::evaluation::{dunn_index_enhanced, elbow_method};
pub use self::external_metrics::*;
pub use self::internal_metrics::*;
pub use self::validation::*;
use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Data, Dimension, Ix2};
use scirs2_core::numeric::{Float, NumCast};
use std::collections::HashMap;
use crate::error::{MetricsError, Result};
pub(crate) fn calculate_distance<F>(a: &[F], b: &[F], metric: &str) -> Result<F>
where
F: Float + NumCast + std::fmt::Debug,
{
if a.len() != b.len() {
return Err(MetricsError::InvalidInput(format!(
"Points must have the same dimension, got {} and {}",
a.len(),
b.len()
)));
}
match metric.to_lowercase().as_str() {
"euclidean" => {
let mut sum = F::zero();
for (ai, bi) in a.iter().zip(b.iter()) {
let diff = *ai - *bi;
sum = sum + diff * diff;
}
Ok(sum.sqrt())
}
"manhattan" => {
let mut sum = F::zero();
for (ai, bi) in a.iter().zip(b.iter()) {
sum = sum + (*ai - *bi).abs();
}
Ok(sum)
}
"cosine" => {
let mut dot = F::zero();
let mut norm_a = F::zero();
let mut norm_b = F::zero();
for (ai, bi) in a.iter().zip(b.iter()) {
dot = dot + *ai * *bi;
norm_a = norm_a + *ai * *ai;
norm_b = norm_b + *bi * *bi;
}
if norm_a < F::epsilon() || norm_b < F::epsilon() {
return Err(MetricsError::InvalidInput(
"Cannot compute cosine distance for zero vectors".to_string(),
));
}
let cosine_similarity = dot / (norm_a.sqrt() * norm_b.sqrt());
Ok(F::one() - cosine_similarity)
}
_ => Err(MetricsError::InvalidInput(format!(
"Unknown distance metric: {metric}"
))),
}
}
pub(crate) fn pairwise_distances<F, S>(x: &ArrayBase<S, Ix2>, metric: &str) -> Result<Array2<F>>
where
F: Float + NumCast + std::fmt::Debug,
S: Data<Elem = F>,
{
let (n_samples, _n_features) = x.dim();
let mut distances = Array2::zeros((n_samples, n_samples));
for i in 0..n_samples {
for j in i..n_samples {
let row_i = x.row(i).to_vec();
let row_j = x.row(j).to_vec();
let dist = calculate_distance(&row_i, &row_j, metric)?;
distances[[i, j]] = dist;
if i != j {
distances[[j, i]] = dist; }
}
}
Ok(distances)
}
pub(crate) fn group_by_labels<F, T, S1, S2, D>(
x: &ArrayBase<S1, Ix2>,
labels: &ArrayBase<S2, D>,
) -> Result<HashMap<T, Vec<usize>>>
where
F: Float + NumCast + std::fmt::Debug,
T: std::hash::Hash + std::cmp::Eq + Copy,
S1: Data<Elem = F>,
S2: Data<Elem = T>,
D: Dimension,
{
let n_samples = x.shape()[0];
if labels.len() != n_samples {
return Err(MetricsError::InvalidInput(format!(
"Number of labels ({}) does not match number of samples ({})",
labels.len(),
n_samples
)));
}
let mut clusters: HashMap<T, Vec<usize>> = HashMap::new();
for (i, &label) in labels.iter().enumerate() {
clusters.entry(label).or_default().push(i);
}
Ok(clusters)
}