fn main() {
use clump::cluster::metrics::{
calinski_harabasz, davies_bouldin, disco_score, silhouette_score,
};
use clump::{Dbscan, Euclidean, Kmeans, NOISE};
let mut data = Vec::new();
for i in 0..40 {
let v = i as f32 * 0.05;
data.push(vec![v, v]);
}
for i in 0..40 {
let v = 20.0 + i as f32 * 0.05;
data.push(vec![v, v]);
}
for i in 0..40 {
let v = 40.0 + i as f32 * 0.05;
data.push(vec![v, -v + 40.0]);
}
data.push(vec![100.0, 100.0]);
let n = data.len();
println!("Data: {n} points (3 clusters of 40 + 1 outlier)\n");
println!(
"{:>3} {:>10} {:>11} {:>12} {:>13}",
"k", "WCSS", "Silhouette", "Calinski-Hab", "Davies-Bould"
);
println!("{}", "-".repeat(56));
for k in 2..=6 {
let fit = Kmeans::new(k).with_seed(42).fit(&data).unwrap();
let wcss = fit.wcss(&data);
let sil = silhouette_score(&data, &fit.labels, &Euclidean);
let ch = calinski_harabasz(&data, &fit.labels, &fit.centroids);
let db = davies_bouldin(&data, &fit.labels, &fit.centroids, &Euclidean);
println!("{k:>3} {wcss:>10.1} {sil:>11.4} {ch:>12.1} {db:>13.4}");
}
println!("\nBest k: highest silhouette, highest Calinski-Harabasz, lowest Davies-Bouldin.");
println!("WCSS decreases monotonically; the elbow is where the rate of decrease slows.\n");
println!("--- DBSCAN + DISCO ---");
let labels = Dbscan::new(3.0, 3).fit_predict(&data).unwrap();
let n_noise = labels.iter().filter(|&&l| l == NOISE).count();
let n_clusters = labels
.iter()
.filter(|&&l| l != NOISE)
.copied()
.max()
.map_or(0, |m| m + 1);
println!("DBSCAN (eps=3.0, min_pts=3): {n_clusters} clusters, {n_noise} noise points");
let disco = disco_score(&data, &labels, &Euclidean, 3);
println!("DISCO score: {disco:.4}");
println!("DISCO evaluates both cluster quality and noise assignment correctness.");
}