use crate::distance::euclidean_sq;
pub fn silhouette_score(features: &[Vec<f64>], labels: &[usize]) -> f64 {
let scores = silhouette_samples(features, labels);
if scores.is_empty() {
return 0.0;
}
scores.iter().sum::<f64>() / scores.len() as f64
}
pub fn silhouette_samples(features: &[Vec<f64>], labels: &[usize]) -> Vec<f64> {
assert_eq!(
features.len(),
labels.len(),
"features and labels must have the same length"
);
let n = features.len();
if n <= 1 {
return vec![0.0; n];
}
let max_label = labels.iter().copied().max().unwrap_or(0);
let n_clusters = max_label + 1;
if n_clusters <= 1 {
return vec![0.0; n];
}
let mut scores = Vec::with_capacity(n);
for i in 0..n {
let my_label = labels[i];
let mut cluster_dist_sum = vec![0.0_f64; n_clusters];
let mut cluster_count = vec![0usize; n_clusters];
for j in 0..n {
if i == j {
continue;
}
let d = euclidean_sq(&features[i], &features[j]).sqrt();
cluster_dist_sum[labels[j]] += d;
cluster_count[labels[j]] += 1;
}
let a = if cluster_count[my_label] > 0 {
cluster_dist_sum[my_label] / cluster_count[my_label] as f64
} else {
0.0 };
let mut b = f64::INFINITY;
for c in 0..n_clusters {
if c == my_label || cluster_count[c] == 0 {
continue;
}
let mean_d = cluster_dist_sum[c] / cluster_count[c] as f64;
if mean_d < b {
b = mean_d;
}
}
if b == f64::INFINITY {
scores.push(0.0);
} else {
let max_ab = a.max(b);
if max_ab < 1e-15 {
scores.push(0.0);
} else {
scores.push((b - a) / max_ab);
}
}
}
scores
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_silhouette_perfect_clusters() {
let features = vec![
vec![0.0, 0.0],
vec![1.0, 0.0],
vec![0.5, 0.5],
vec![100.0, 100.0],
vec![101.0, 100.0],
vec![100.5, 100.5],
];
let labels = vec![0, 0, 0, 1, 1, 1];
let score = silhouette_score(&features, &labels);
assert!(
score > 0.90,
"well-separated clusters should have silhouette > 0.90, got {score:.4}"
);
}
#[test]
fn test_silhouette_single_cluster() {
let features = vec![vec![1.0], vec![2.0], vec![3.0]];
let labels = vec![0, 0, 0];
let score = silhouette_score(&features, &labels);
assert!(
score.abs() < 1e-10,
"single cluster should have silhouette 0, got {score}"
);
}
#[test]
fn test_silhouette_samples_length() {
let features = vec![vec![0.0], vec![10.0], vec![0.5], vec![10.5]];
let labels = vec![0, 1, 0, 1];
let scores = silhouette_samples(&features, &labels);
assert_eq!(scores.len(), 4);
for &s in &scores {
assert!(
(-1.0..=1.0).contains(&s),
"silhouette must be in [-1, 1], got {s}"
);
}
}
}