pub struct ClusterAssignment {
pub index: usize,
pub cluster: i64, }
pub fn euclidean_distance_matrix(features: &[Vec<f64>]) -> Vec<Vec<f64>> {
let n = features.len();
let mut dist = vec![vec![0.0; n]; n];
for i in 0..n {
for j in (i + 1)..n {
let d = euclidean_distance(&features[i], &features[j]);
dist[i][j] = d;
dist[j][i] = d;
}
}
dist
}
pub fn haversine_distance_matrix(points: &[(f64, f64)]) -> Vec<Vec<f64>> {
let n = points.len();
let mut dist = vec![vec![0.0; n]; n];
for i in 0..n {
for j in (i + 1)..n {
let d = crate::graph::features::spatial::geodesic_distance(
points[i].0,
points[i].1,
points[j].0,
points[j].1,
);
dist[i][j] = d;
dist[j][i] = d;
}
}
dist
}
fn euclidean_distance(a: &[f64], b: &[f64]) -> f64 {
a.iter()
.zip(b.iter())
.map(|(x, y)| (x - y) * (x - y))
.sum::<f64>()
.sqrt()
}
pub fn normalize_features(features: &mut [Vec<f64>]) {
if features.is_empty() {
return;
}
let dims = features[0].len();
for d in 0..dims {
let mut min = f64::INFINITY;
let mut max = f64::NEG_INFINITY;
for f in features.iter() {
if f[d] < min {
min = f[d];
}
if f[d] > max {
max = f[d];
}
}
let range = max - min;
if range > 0.0 {
for f in features.iter_mut() {
f[d] = (f[d] - min) / range;
}
} else {
for f in features.iter_mut() {
f[d] = 0.0;
}
}
}
}
pub fn dbscan(distances: &[Vec<f64>], eps: f64, min_points: usize) -> Vec<ClusterAssignment> {
let n = distances.len();
let neighbors: Vec<Vec<usize>> = (0..n)
.map(|i| {
(0..n)
.filter(|&j| j != i && distances[i][j] <= eps)
.collect()
})
.collect();
let mut labels: Vec<i64> = vec![-2; n]; let mut cluster_id: i64 = 0;
for i in 0..n {
if labels[i] != -2 {
continue; }
if neighbors[i].len() < min_points {
labels[i] = -1; continue;
}
labels[i] = cluster_id;
let mut queue: Vec<usize> = neighbors[i].clone();
let mut qi = 0;
while qi < queue.len() {
let q = queue[qi];
qi += 1;
if labels[q] == -1 {
labels[q] = cluster_id; }
if labels[q] != -2 {
continue; }
labels[q] = cluster_id;
if neighbors[q].len() >= min_points {
for &nb in &neighbors[q] {
if (labels[nb] == -2 || labels[nb] == -1) && !queue.contains(&nb) {
queue.push(nb);
}
}
}
}
cluster_id += 1;
}
(0..n)
.map(|i| ClusterAssignment {
index: i,
cluster: labels[i],
})
.collect()
}
pub fn kmeans(features: &[Vec<f64>], k: usize, max_iterations: usize) -> Vec<ClusterAssignment> {
let n = features.len();
if n == 0 || k == 0 {
return Vec::new();
}
let k = k.min(n); let dims = features[0].len();
let mut centroids: Vec<Vec<f64>> = Vec::with_capacity(k);
let mut mean = vec![0.0; dims];
for f in features.iter() {
for d in 0..dims {
mean[d] += f[d];
}
}
for m in mean.iter_mut() {
*m /= n as f64;
}
let first = (0..n)
.min_by(|&a, &b| {
euclidean_distance(&features[a], &mean)
.partial_cmp(&euclidean_distance(&features[b], &mean))
.unwrap_or(std::cmp::Ordering::Equal)
})
.unwrap_or(0);
centroids.push(features[first].clone());
for _ in 1..k {
let mut best_idx = 0;
let mut best_dist = f64::NEG_INFINITY;
for (i, feat) in features.iter().enumerate() {
let min_dist = centroids
.iter()
.map(|c| euclidean_distance(feat, c))
.fold(f64::INFINITY, f64::min);
if min_dist > best_dist {
best_dist = min_dist;
best_idx = i;
}
}
centroids.push(features[best_idx].clone());
}
let mut assignments: Vec<usize> = vec![0; n];
for _ in 0..max_iterations {
let mut changed = false;
for i in 0..n {
let nearest = (0..k)
.min_by(|&a, &b| {
euclidean_distance(&features[i], ¢roids[a])
.partial_cmp(&euclidean_distance(&features[i], ¢roids[b]))
.unwrap_or(std::cmp::Ordering::Equal)
})
.unwrap_or(0);
if assignments[i] != nearest {
assignments[i] = nearest;
changed = true;
}
}
if !changed {
break;
}
let mut new_centroids = vec![vec![0.0; dims]; k];
let mut counts = vec![0usize; k];
for i in 0..n {
let c = assignments[i];
counts[c] += 1;
for d in 0..dims {
new_centroids[c][d] += features[i][d];
}
}
for c in 0..k {
if counts[c] > 0 {
for val in new_centroids[c].iter_mut() {
*val /= counts[c] as f64;
}
} else {
new_centroids[c] = centroids[c].clone();
}
}
centroids = new_centroids;
}
(0..n)
.map(|i| ClusterAssignment {
index: i,
cluster: assignments[i] as i64,
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dbscan_two_clusters() {
let features = vec![
vec![0.0, 0.0],
vec![1.0, 0.0],
vec![0.0, 1.0],
vec![10.0, 10.0],
vec![11.0, 10.0],
vec![10.0, 11.0],
vec![50.0, 50.0],
];
let dm = euclidean_distance_matrix(&features);
let result = dbscan(&dm, 2.0, 2);
assert_eq!(result[0].cluster, result[1].cluster);
assert_eq!(result[0].cluster, result[2].cluster);
assert_eq!(result[3].cluster, result[4].cluster);
assert_eq!(result[3].cluster, result[5].cluster);
assert_ne!(result[0].cluster, result[3].cluster);
assert_eq!(result[6].cluster, -1);
}
#[test]
fn test_dbscan_all_one_cluster() {
let features = vec![vec![0.0, 0.0], vec![1.0, 0.0], vec![0.5, 0.5]];
let dm = euclidean_distance_matrix(&features);
let result = dbscan(&dm, 2.0, 2);
assert_eq!(result[0].cluster, result[1].cluster);
assert_eq!(result[0].cluster, result[2].cluster);
assert!(result[0].cluster >= 0);
}
#[test]
fn test_dbscan_all_noise() {
let features = vec![vec![0.0, 0.0], vec![100.0, 100.0], vec![200.0, 200.0]];
let dm = euclidean_distance_matrix(&features);
let result = dbscan(&dm, 1.0, 2);
for r in &result {
assert_eq!(r.cluster, -1);
}
}
#[test]
fn test_kmeans_two_clusters() {
let features = vec![
vec![0.0, 0.0],
vec![1.0, 0.0],
vec![0.0, 1.0],
vec![10.0, 10.0],
vec![11.0, 10.0],
vec![10.0, 11.0],
];
let result = kmeans(&features, 2, 100);
assert_eq!(result[0].cluster, result[1].cluster);
assert_eq!(result[0].cluster, result[2].cluster);
assert_eq!(result[3].cluster, result[4].cluster);
assert_eq!(result[3].cluster, result[5].cluster);
assert_ne!(result[0].cluster, result[3].cluster);
}
#[test]
fn test_normalize_features() {
let mut features = vec![vec![0.0, 100.0], vec![10.0, 200.0], vec![5.0, 150.0]];
normalize_features(&mut features);
assert_eq!(features[0], vec![0.0, 0.0]);
assert_eq!(features[1], vec![1.0, 1.0]);
assert_eq!(features[2], vec![0.5, 0.5]);
}
#[test]
fn test_euclidean_distance_matrix() {
let features = vec![vec![0.0, 0.0], vec![3.0, 4.0]];
let dm = euclidean_distance_matrix(&features);
assert!((dm[0][1] - 5.0).abs() < 1e-10);
assert!((dm[1][0] - 5.0).abs() < 1e-10);
assert_eq!(dm[0][0], 0.0);
}
#[test]
fn test_haversine_distance_matrix() {
let points = vec![(59.91, 10.75), (60.39, 5.32)];
let dm = haversine_distance_matrix(&points);
assert!(dm[0][1] > 250_000.0 && dm[0][1] < 350_000.0);
assert_eq!(dm[0][0], 0.0);
}
}