pub mod distance_based;
pub mod gap_statistic;
pub mod information_theoretic;
pub mod set_based;
pub mod utils;
pub use distance_based::{
calinski_harabasz_score, davies_bouldin_score, dunn_index, silhouette_score, xie_beni_index,
};
pub use information_theoretic::{
adjusted_mutual_info_score, completeness_score, homogeneity_score,
normalized_mutual_info_score, v_measure_score,
};
pub use set_based::{adjusted_rand_score, fowlkes_mallows_score};
pub use utils::{combinations, compute_entropy};
pub use gap_statistic::{GapStatistic, GapStatisticConfig, GapStatisticResult};
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
use torsh_tensor::Tensor;
#[test]
fn test_all_metrics_integration() -> Result<(), Box<dyn std::error::Error>> {
let data = Tensor::from_vec(
vec![
0.0, 0.0, 0.1, 0.1, 5.0, 5.0, 5.1, 5.1,
],
&[4, 2],
)?;
let labels_true = Tensor::from_vec(vec![0.0, 0.0, 1.0, 1.0], &[4])?;
let labels_pred = Tensor::from_vec(vec![0.0, 0.0, 1.0, 1.0], &[4])?;
let silhouette = silhouette_score(&data, &labels_pred)?;
assert!(
silhouette > 0.5,
"Silhouette score should be high for well-separated clusters"
);
let ch_score = calinski_harabasz_score(&data, &labels_pred)?;
assert!(
ch_score > 1.0,
"CH score should be > 1 for well-separated clusters"
);
let db_score = davies_bouldin_score(&data, &labels_pred)?;
assert!(db_score >= 0.0, "DB score should be non-negative");
let nmi = normalized_mutual_info_score(&labels_true, &labels_pred)?;
assert_relative_eq!(nmi, 1.0, epsilon = 1e-6);
let ami = adjusted_mutual_info_score(&labels_true, &labels_pred)?;
assert_relative_eq!(ami, 1.0, epsilon = 1e-6);
let homogeneity = homogeneity_score(&labels_true, &labels_pred)?;
assert_relative_eq!(homogeneity, 1.0, epsilon = 1e-6);
let completeness = completeness_score(&labels_true, &labels_pred)?;
assert_relative_eq!(completeness, 1.0, epsilon = 1e-6);
let v_measure = v_measure_score(&labels_true, &labels_pred)?;
assert_relative_eq!(v_measure, 1.0, epsilon = 1e-6);
let ari = adjusted_rand_score(&labels_true, &labels_pred)?;
assert_relative_eq!(ari, 1.0, epsilon = 1e-6);
let fm = fowlkes_mallows_score(&labels_true, &labels_pred)?;
assert_relative_eq!(fm, 1.0, epsilon = 1e-6);
Ok(())
}
#[test]
fn test_metrics_consistency() -> Result<(), Box<dyn std::error::Error>> {
let data = Tensor::from_vec(
vec![1.0, 1.0, 1.1, 1.1, 5.0, 5.0, 5.1, 5.1, 9.0, 9.0, 9.1, 9.1],
&[6, 2],
)?;
let labels_true = Tensor::from_vec(vec![0.0, 0.0, 1.0, 1.0, 2.0, 2.0], &[6])?;
let labels_pred = Tensor::from_vec(vec![0.0, 0.0, 1.0, 1.0, 2.0, 2.0], &[6])?;
let perfect_metrics = vec![
normalized_mutual_info_score(&labels_true, &labels_pred)?,
adjusted_mutual_info_score(&labels_true, &labels_pred)?,
homogeneity_score(&labels_true, &labels_pred)?,
completeness_score(&labels_true, &labels_pred)?,
v_measure_score(&labels_true, &labels_pred)?,
adjusted_rand_score(&labels_true, &labels_pred)?,
fowlkes_mallows_score(&labels_true, &labels_pred)?,
];
for score in perfect_metrics {
assert_relative_eq!(score, 1.0, epsilon = 1e-6);
}
let silhouette = silhouette_score(&data, &labels_pred)?;
assert!(silhouette >= 0.0 && silhouette <= 1.0);
let ch_score = calinski_harabasz_score(&data, &labels_pred)?;
assert!(ch_score > 0.0);
let db_score = davies_bouldin_score(&data, &labels_pred)?;
assert!(db_score >= 0.0);
Ok(())
}
}