leptos_helios/graph_features/
clustering.rs1use super::{GraphEdge, GraphNode};
6
7#[derive(Debug, Clone)]
9pub struct GraphClusterer {
10 pub clusters: Vec<Vec<GraphNode>>,
11 pub cluster_count: usize,
12 pub algorithm: ClusteringAlgorithm,
13}
14
15#[derive(Debug, Clone, PartialEq)]
17pub enum ClusteringAlgorithm {
18 KMeans,
19 Hierarchical,
20 CommunityDetection,
21 Spectral,
22}
23
24impl GraphClusterer {
25 pub fn new() -> Self {
27 Self {
28 clusters: Vec::new(),
29 cluster_count: 0,
30 algorithm: ClusteringAlgorithm::KMeans,
31 }
32 }
33
34 pub fn k_means_clustering(&mut self, nodes: &[GraphNode], k: usize) -> Vec<Vec<GraphNode>> {
36 if nodes.is_empty() || k == 0 {
37 return Vec::new();
38 }
39
40 let mut centroids = Vec::new();
42 for i in 0..k {
43 let node = &nodes[i % nodes.len()];
44 centroids.push((node.x, node.y));
45 }
46
47 let mut clusters = vec![Vec::new(); k];
48 let mut converged = false;
49 let mut iterations = 0;
50 let max_iterations = 100;
51
52 while !converged && iterations < max_iterations {
53 clusters = vec![Vec::new(); k];
55 for node in nodes {
56 let mut min_distance = f64::INFINITY;
57 let mut best_cluster = 0;
58
59 for (i, centroid) in centroids.iter().enumerate() {
60 let distance =
61 ((node.x - centroid.0).powi(2) + (node.y - centroid.1).powi(2)).sqrt();
62 if distance < min_distance {
63 min_distance = distance;
64 best_cluster = i;
65 }
66 }
67
68 clusters[best_cluster].push(node.clone());
69 }
70
71 let mut new_centroids = Vec::new();
73 for cluster in &clusters {
74 if cluster.is_empty() {
75 new_centroids.push((0.0, 0.0));
76 } else {
77 let avg_x = cluster.iter().map(|n| n.x).sum::<f64>() / cluster.len() as f64;
78 let avg_y = cluster.iter().map(|n| n.y).sum::<f64>() / cluster.len() as f64;
79 new_centroids.push((avg_x, avg_y));
80 }
81 }
82
83 converged = true;
85 for (old, new) in centroids.iter().zip(new_centroids.iter()) {
86 let distance = ((old.0 - new.0).powi(2) + (old.1 - new.1).powi(2)).sqrt();
87 if distance > 0.001 {
88 converged = false;
89 break;
90 }
91 }
92
93 centroids = new_centroids;
94 iterations += 1;
95 }
96
97 self.clusters = clusters.clone();
98 self.cluster_count = k;
99 self.algorithm = ClusteringAlgorithm::KMeans;
100 clusters
101 }
102
103 pub fn hierarchical_clustering(
105 &mut self,
106 nodes: &[GraphNode],
107 k: usize,
108 ) -> Vec<Vec<GraphNode>> {
109 if nodes.is_empty() || k == 0 {
110 return Vec::new();
111 }
112
113 let mut clusters: Vec<Vec<GraphNode>> = nodes.iter().map(|n| vec![n.clone()]).collect();
115
116 while clusters.len() > k {
118 let mut min_distance = f64::INFINITY;
119 let mut best_pair = (0, 1);
120
121 for i in 0..clusters.len() {
123 for j in (i + 1)..clusters.len() {
124 let distance = self.calculate_cluster_distance(&clusters[i], &clusters[j]);
125 if distance < min_distance {
126 min_distance = distance;
127 best_pair = (i, j);
128 }
129 }
130 }
131
132 let cluster2 = clusters.remove(best_pair.1);
134 clusters[best_pair.0].extend(cluster2);
135 }
136
137 self.clusters = clusters.clone();
138 self.cluster_count = k;
139 self.algorithm = ClusteringAlgorithm::Hierarchical;
140 clusters
141 }
142
143 pub fn detect_communities(
145 &mut self,
146 nodes: &[GraphNode],
147 edges: &[GraphEdge],
148 ) -> Vec<Vec<GraphNode>> {
149 let mut communities: Vec<Vec<GraphNode>> = nodes.iter().map(|n| vec![n.clone()]).collect();
151 let mut improved = true;
152
153 while improved {
154 improved = false;
155
156 for node in nodes {
157 let mut best_community = 0;
158 let mut best_modularity_gain = 0.0;
159
160 for (i, _community) in communities.iter().enumerate() {
162 let modularity_gain =
163 self.calculate_modularity_gain(node, &communities, edges, i);
164 if modularity_gain > best_modularity_gain {
165 best_modularity_gain = modularity_gain;
166 best_community = i;
167 }
168 }
169
170 if best_modularity_gain > 0.0 {
172 for community in &mut communities {
174 community.retain(|n| n.id != node.id);
175 }
176
177 communities[best_community].push(node.clone());
179 improved = true;
180 }
181 }
182 }
183
184 communities.retain(|c| !c.is_empty());
186
187 self.clusters = communities.clone();
188 self.cluster_count = communities.len();
189 self.algorithm = ClusteringAlgorithm::CommunityDetection;
190 communities
191 }
192
193 fn calculate_cluster_distance(&self, cluster1: &[GraphNode], cluster2: &[GraphNode]) -> f64 {
195 let mut total_distance = 0.0;
196 let mut count = 0;
197
198 for node1 in cluster1 {
199 for node2 in cluster2 {
200 let distance = ((node1.x - node2.x).powi(2) + (node1.y - node2.y).powi(2)).sqrt();
201 total_distance += distance;
202 count += 1;
203 }
204 }
205
206 if count > 0 {
207 total_distance / count as f64
208 } else {
209 f64::INFINITY
210 }
211 }
212
213 fn calculate_modularity_gain(
215 &self,
216 node: &GraphNode,
217 communities: &[Vec<GraphNode>],
218 edges: &[GraphEdge],
219 community_index: usize,
220 ) -> f64 {
221 let mut gain = 0.0;
223
224 let community = &communities[community_index];
226 let mut internal_edges = 0;
227 let mut total_edges = 0;
228
229 for edge in edges {
230 if edge.source == node.id || edge.target == node.id {
231 total_edges += 1;
232 if community.iter().any(|n| n.id == edge.source)
233 && community.iter().any(|n| n.id == edge.target)
234 {
235 internal_edges += 1;
236 }
237 }
238 }
239
240 if total_edges > 0 {
241 gain = (internal_edges as f64 / total_edges as f64) - 0.5;
242 }
243
244 gain
245 }
246
247 pub fn calculate_silhouette_score(&self, clusters: &[Vec<GraphNode>]) -> f64 {
249 if clusters.len() < 2 {
250 return 0.0;
251 }
252
253 let mut total_score = 0.0;
254 let mut total_nodes = 0;
255
256 for cluster in clusters {
257 for node in cluster {
258 let mut intra_distance = 0.0;
259 let mut inter_distances = Vec::new();
260
261 for other in cluster {
263 if other.id != node.id {
264 let distance =
265 ((node.x - other.x).powi(2) + (node.y - other.y).powi(2)).sqrt();
266 intra_distance += distance;
267 }
268 }
269 if cluster.len() > 1 {
270 intra_distance /= (cluster.len() - 1) as f64;
271 }
272
273 for other_cluster in clusters {
275 if other_cluster != cluster {
276 let mut min_inter_distance = f64::INFINITY;
277 for other in other_cluster {
278 let distance =
279 ((node.x - other.x).powi(2) + (node.y - other.y).powi(2)).sqrt();
280 min_inter_distance = min_inter_distance.min(distance);
281 }
282 inter_distances.push(min_inter_distance);
283 }
284 }
285
286 if !inter_distances.is_empty() {
287 let min_inter_distance =
288 inter_distances.iter().fold(f64::INFINITY, |a, &b| a.min(b));
289 let silhouette = (min_inter_distance - intra_distance)
290 / min_inter_distance.max(intra_distance);
291 total_score += silhouette;
292 total_nodes += 1;
293 }
294 }
295 }
296
297 if total_nodes > 0 {
298 total_score / total_nodes as f64
299 } else {
300 0.0
301 }
302 }
303
304 pub fn calculate_modularity(&self, clusters: &[Vec<GraphNode>], edges: &[GraphEdge]) -> f64 {
306 if edges.is_empty() {
307 return 0.0;
308 }
309
310 let total_edges = edges.len() as f64;
311 let mut modularity = 0.0;
312
313 for cluster in clusters {
314 let mut intra_edges = 0;
315 let mut total_degree = 0;
316
317 for node in cluster {
318 for edge in edges {
319 if edge.source == node.id || edge.target == node.id {
320 total_degree += 1;
321 if cluster.iter().any(|n| n.id == edge.source)
322 && cluster.iter().any(|n| n.id == edge.target)
323 {
324 intra_edges += 1;
325 }
326 }
327 }
328 }
329
330 let cluster_modularity = (intra_edges as f64 / total_edges)
331 - (total_degree as f64 / (2.0 * total_edges)).powi(2);
332 modularity += cluster_modularity;
333 }
334
335 modularity
336 }
337}