leptos_helios/graph_features/
clustering.rs

1//! Graph Clustering System
2//!
3//! This module provides graph clustering algorithms for community detection and analysis.
4
5use super::{GraphEdge, GraphNode};
6
7/// Graph clustering system for community detection and analysis
8#[derive(Debug, Clone)]
9pub struct GraphClusterer {
10    pub clusters: Vec<Vec<GraphNode>>,
11    pub cluster_count: usize,
12    pub algorithm: ClusteringAlgorithm,
13}
14
15/// Available clustering algorithms
16#[derive(Debug, Clone, PartialEq)]
17pub enum ClusteringAlgorithm {
18    KMeans,
19    Hierarchical,
20    CommunityDetection,
21    Spectral,
22}
23
24impl GraphClusterer {
25    /// Create a new graph clusterer
26    pub fn new() -> Self {
27        Self {
28            clusters: Vec::new(),
29            cluster_count: 0,
30            algorithm: ClusteringAlgorithm::KMeans,
31        }
32    }
33
34    /// Perform k-means clustering
35    pub fn k_means_clustering(&mut self, nodes: &[GraphNode], k: usize) -> Vec<Vec<GraphNode>> {
36        if nodes.is_empty() || k == 0 {
37            return Vec::new();
38        }
39
40        // Initialize centroids randomly
41        let mut centroids = Vec::new();
42        for i in 0..k {
43            let node = &nodes[i % nodes.len()];
44            centroids.push((node.x, node.y));
45        }
46
47        let mut clusters = vec![Vec::new(); k];
48        let mut converged = false;
49        let mut iterations = 0;
50        let max_iterations = 100;
51
52        while !converged && iterations < max_iterations {
53            // Assign nodes to clusters
54            clusters = vec![Vec::new(); k];
55            for node in nodes {
56                let mut min_distance = f64::INFINITY;
57                let mut best_cluster = 0;
58
59                for (i, centroid) in centroids.iter().enumerate() {
60                    let distance =
61                        ((node.x - centroid.0).powi(2) + (node.y - centroid.1).powi(2)).sqrt();
62                    if distance < min_distance {
63                        min_distance = distance;
64                        best_cluster = i;
65                    }
66                }
67
68                clusters[best_cluster].push(node.clone());
69            }
70
71            // Update centroids
72            let mut new_centroids = Vec::new();
73            for cluster in &clusters {
74                if cluster.is_empty() {
75                    new_centroids.push((0.0, 0.0));
76                } else {
77                    let avg_x = cluster.iter().map(|n| n.x).sum::<f64>() / cluster.len() as f64;
78                    let avg_y = cluster.iter().map(|n| n.y).sum::<f64>() / cluster.len() as f64;
79                    new_centroids.push((avg_x, avg_y));
80                }
81            }
82
83            // Check for convergence
84            converged = true;
85            for (old, new) in centroids.iter().zip(new_centroids.iter()) {
86                let distance = ((old.0 - new.0).powi(2) + (old.1 - new.1).powi(2)).sqrt();
87                if distance > 0.001 {
88                    converged = false;
89                    break;
90                }
91            }
92
93            centroids = new_centroids;
94            iterations += 1;
95        }
96
97        self.clusters = clusters.clone();
98        self.cluster_count = k;
99        self.algorithm = ClusteringAlgorithm::KMeans;
100        clusters
101    }
102
103    /// Perform hierarchical clustering
104    pub fn hierarchical_clustering(
105        &mut self,
106        nodes: &[GraphNode],
107        k: usize,
108    ) -> Vec<Vec<GraphNode>> {
109        if nodes.is_empty() || k == 0 {
110            return Vec::new();
111        }
112
113        // Start with each node as its own cluster
114        let mut clusters: Vec<Vec<GraphNode>> = nodes.iter().map(|n| vec![n.clone()]).collect();
115
116        // Merge clusters until we have k clusters
117        while clusters.len() > k {
118            let mut min_distance = f64::INFINITY;
119            let mut best_pair = (0, 1);
120
121            // Find the closest pair of clusters
122            for i in 0..clusters.len() {
123                for j in (i + 1)..clusters.len() {
124                    let distance = self.calculate_cluster_distance(&clusters[i], &clusters[j]);
125                    if distance < min_distance {
126                        min_distance = distance;
127                        best_pair = (i, j);
128                    }
129                }
130            }
131
132            // Merge the closest clusters
133            let cluster2 = clusters.remove(best_pair.1);
134            clusters[best_pair.0].extend(cluster2);
135        }
136
137        self.clusters = clusters.clone();
138        self.cluster_count = k;
139        self.algorithm = ClusteringAlgorithm::Hierarchical;
140        clusters
141    }
142
143    /// Detect communities using modularity optimization
144    pub fn detect_communities(
145        &mut self,
146        nodes: &[GraphNode],
147        edges: &[GraphEdge],
148    ) -> Vec<Vec<GraphNode>> {
149        // Start with each node in its own community
150        let mut communities: Vec<Vec<GraphNode>> = nodes.iter().map(|n| vec![n.clone()]).collect();
151        let mut improved = true;
152
153        while improved {
154            improved = false;
155
156            for node in nodes {
157                let mut best_community = 0;
158                let mut best_modularity_gain = 0.0;
159
160                // Try moving the node to each community
161                for (i, _community) in communities.iter().enumerate() {
162                    let modularity_gain =
163                        self.calculate_modularity_gain(node, &communities, edges, i);
164                    if modularity_gain > best_modularity_gain {
165                        best_modularity_gain = modularity_gain;
166                        best_community = i;
167                    }
168                }
169
170                // Move node to best community if it improves modularity
171                if best_modularity_gain > 0.0 {
172                    // Remove from current community
173                    for community in &mut communities {
174                        community.retain(|n| n.id != node.id);
175                    }
176
177                    // Add to best community
178                    communities[best_community].push(node.clone());
179                    improved = true;
180                }
181            }
182        }
183
184        // Remove empty communities
185        communities.retain(|c| !c.is_empty());
186
187        self.clusters = communities.clone();
188        self.cluster_count = communities.len();
189        self.algorithm = ClusteringAlgorithm::CommunityDetection;
190        communities
191    }
192
193    /// Calculate distance between two clusters
194    fn calculate_cluster_distance(&self, cluster1: &[GraphNode], cluster2: &[GraphNode]) -> f64 {
195        let mut total_distance = 0.0;
196        let mut count = 0;
197
198        for node1 in cluster1 {
199            for node2 in cluster2 {
200                let distance = ((node1.x - node2.x).powi(2) + (node1.y - node2.y).powi(2)).sqrt();
201                total_distance += distance;
202                count += 1;
203            }
204        }
205
206        if count > 0 {
207            total_distance / count as f64
208        } else {
209            f64::INFINITY
210        }
211    }
212
213    /// Calculate modularity gain for moving a node to a community
214    fn calculate_modularity_gain(
215        &self,
216        node: &GraphNode,
217        communities: &[Vec<GraphNode>],
218        edges: &[GraphEdge],
219        community_index: usize,
220    ) -> f64 {
221        // Simplified modularity gain calculation
222        let mut gain = 0.0;
223
224        // Count edges within the target community
225        let community = &communities[community_index];
226        let mut internal_edges = 0;
227        let mut total_edges = 0;
228
229        for edge in edges {
230            if edge.source == node.id || edge.target == node.id {
231                total_edges += 1;
232                if community.iter().any(|n| n.id == edge.source)
233                    && community.iter().any(|n| n.id == edge.target)
234                {
235                    internal_edges += 1;
236                }
237            }
238        }
239
240        if total_edges > 0 {
241            gain = (internal_edges as f64 / total_edges as f64) - 0.5;
242        }
243
244        gain
245    }
246
247    /// Calculate silhouette score for clustering quality
248    pub fn calculate_silhouette_score(&self, clusters: &[Vec<GraphNode>]) -> f64 {
249        if clusters.len() < 2 {
250            return 0.0;
251        }
252
253        let mut total_score = 0.0;
254        let mut total_nodes = 0;
255
256        for cluster in clusters {
257            for node in cluster {
258                let mut intra_distance = 0.0;
259                let mut inter_distances = Vec::new();
260
261                // Calculate intra-cluster distance
262                for other in cluster {
263                    if other.id != node.id {
264                        let distance =
265                            ((node.x - other.x).powi(2) + (node.y - other.y).powi(2)).sqrt();
266                        intra_distance += distance;
267                    }
268                }
269                if cluster.len() > 1 {
270                    intra_distance /= (cluster.len() - 1) as f64;
271                }
272
273                // Calculate inter-cluster distances
274                for other_cluster in clusters {
275                    if other_cluster != cluster {
276                        let mut min_inter_distance = f64::INFINITY;
277                        for other in other_cluster {
278                            let distance =
279                                ((node.x - other.x).powi(2) + (node.y - other.y).powi(2)).sqrt();
280                            min_inter_distance = min_inter_distance.min(distance);
281                        }
282                        inter_distances.push(min_inter_distance);
283                    }
284                }
285
286                if !inter_distances.is_empty() {
287                    let min_inter_distance =
288                        inter_distances.iter().fold(f64::INFINITY, |a, &b| a.min(b));
289                    let silhouette = (min_inter_distance - intra_distance)
290                        / min_inter_distance.max(intra_distance);
291                    total_score += silhouette;
292                    total_nodes += 1;
293                }
294            }
295        }
296
297        if total_nodes > 0 {
298            total_score / total_nodes as f64
299        } else {
300            0.0
301        }
302    }
303
304    /// Calculate modularity of the clustering
305    pub fn calculate_modularity(&self, clusters: &[Vec<GraphNode>], edges: &[GraphEdge]) -> f64 {
306        if edges.is_empty() {
307            return 0.0;
308        }
309
310        let total_edges = edges.len() as f64;
311        let mut modularity = 0.0;
312
313        for cluster in clusters {
314            let mut intra_edges = 0;
315            let mut total_degree = 0;
316
317            for node in cluster {
318                for edge in edges {
319                    if edge.source == node.id || edge.target == node.id {
320                        total_degree += 1;
321                        if cluster.iter().any(|n| n.id == edge.source)
322                            && cluster.iter().any(|n| n.id == edge.target)
323                        {
324                            intra_edges += 1;
325                        }
326                    }
327                }
328            }
329
330            let cluster_modularity = (intra_edges as f64 / total_edges)
331                - (total_degree as f64 / (2.0 * total_edges)).powi(2);
332            modularity += cluster_modularity;
333        }
334
335        modularity
336    }
337}