codemem_graph/
algorithms.rs1use crate::GraphEngine;
2use codemem_core::{Edge, GraphNode, NodeKind};
3use petgraph::graph::NodeIndex;
4use petgraph::Direction;
5use std::collections::{HashMap, HashSet, VecDeque};
6
7impl GraphEngine {
8 pub fn pagerank(
16 &self,
17 damping: f64,
18 iterations: usize,
19 tolerance: f64,
20 ) -> HashMap<String, f64> {
21 let n = self.graph.node_count();
22 if n == 0 {
23 return HashMap::new();
24 }
25
26 let nf = n as f64;
27 let initial = 1.0 / nf;
28
29 let indices: Vec<NodeIndex> = self.graph.node_indices().collect();
31 let idx_pos: HashMap<NodeIndex, usize> = indices
32 .iter()
33 .enumerate()
34 .map(|(i, &idx)| (idx, i))
35 .collect();
36
37 let mut scores = vec![initial; n];
38
39 let out_degree: Vec<usize> = indices
41 .iter()
42 .map(|&idx| {
43 self.graph
44 .neighbors_directed(idx, Direction::Outgoing)
45 .count()
46 })
47 .collect();
48
49 for _ in 0..iterations {
50 let mut new_scores = vec![(1.0 - damping) / nf; n];
51
52 for (i, &idx) in indices.iter().enumerate() {
54 let deg = out_degree[i];
55 if deg == 0 {
56 let share = damping * scores[i] / nf;
58 for ns in new_scores.iter_mut() {
59 *ns += share;
60 }
61 } else {
62 let share = damping * scores[i] / deg as f64;
63 for neighbor in self.graph.neighbors_directed(idx, Direction::Outgoing) {
64 if let Some(&pos) = idx_pos.get(&neighbor) {
65 new_scores[pos] += share;
66 }
67 }
68 }
69 }
70
71 let diff: f64 = scores
73 .iter()
74 .zip(new_scores.iter())
75 .map(|(a, b)| (a - b).abs())
76 .sum();
77
78 scores = new_scores;
79
80 if diff < tolerance {
81 break;
82 }
83 }
84
85 indices
87 .iter()
88 .enumerate()
89 .filter_map(|(i, &idx)| {
90 self.graph
91 .node_weight(idx)
92 .map(|id| (id.clone(), scores[i]))
93 })
94 .collect()
95 }
96
97 pub fn personalized_pagerank(
104 &self,
105 seed_weights: &HashMap<String, f64>,
106 damping: f64,
107 iterations: usize,
108 tolerance: f64,
109 ) -> HashMap<String, f64> {
110 let n = self.graph.node_count();
111 if n == 0 {
112 return HashMap::new();
113 }
114
115 let nf = n as f64;
116
117 let indices: Vec<NodeIndex> = self.graph.node_indices().collect();
118 let idx_pos: HashMap<NodeIndex, usize> = indices
119 .iter()
120 .enumerate()
121 .map(|(i, &idx)| (idx, i))
122 .collect();
123
124 let mut teleport = vec![0.0f64; n];
126 let mut teleport_sum = 0.0;
127 for (i, &idx) in indices.iter().enumerate() {
128 if let Some(node_id) = self.graph.node_weight(idx) {
129 if let Some(&w) = seed_weights.get(node_id) {
130 teleport[i] = w;
131 teleport_sum += w;
132 }
133 }
134 }
135 if teleport_sum > 0.0 {
137 for t in teleport.iter_mut() {
138 *t /= teleport_sum;
139 }
140 } else {
141 for t in teleport.iter_mut() {
142 *t = 1.0 / nf;
143 }
144 }
145
146 let initial = 1.0 / nf;
147 let mut scores = vec![initial; n];
148
149 let out_degree: Vec<usize> = indices
150 .iter()
151 .map(|&idx| {
152 self.graph
153 .neighbors_directed(idx, Direction::Outgoing)
154 .count()
155 })
156 .collect();
157
158 for _ in 0..iterations {
159 let mut new_scores: Vec<f64> = teleport.iter().map(|&t| (1.0 - damping) * t).collect();
160
161 for (i, &idx) in indices.iter().enumerate() {
162 let deg = out_degree[i];
163 if deg == 0 {
164 let share = damping * scores[i];
166 for (j, t) in teleport.iter().enumerate() {
167 new_scores[j] += share * t;
168 }
169 } else {
170 let share = damping * scores[i] / deg as f64;
171 for neighbor in self.graph.neighbors_directed(idx, Direction::Outgoing) {
172 if let Some(&pos) = idx_pos.get(&neighbor) {
173 new_scores[pos] += share;
174 }
175 }
176 }
177 }
178
179 let diff: f64 = scores
180 .iter()
181 .zip(new_scores.iter())
182 .map(|(a, b)| (a - b).abs())
183 .sum();
184
185 scores = new_scores;
186
187 if diff < tolerance {
188 break;
189 }
190 }
191
192 indices
193 .iter()
194 .enumerate()
195 .filter_map(|(i, &idx)| {
196 self.graph
197 .node_weight(idx)
198 .map(|id| (id.clone(), scores[i]))
199 })
200 .collect()
201 }
202
203 pub fn louvain_communities(&self, resolution: f64) -> Vec<Vec<String>> {
209 let n = self.graph.node_count();
210 if n == 0 {
211 return Vec::new();
212 }
213
214 let indices: Vec<NodeIndex> = self.graph.node_indices().collect();
215 let idx_pos: HashMap<NodeIndex, usize> = indices
216 .iter()
217 .enumerate()
218 .map(|(i, &idx)| (idx, i))
219 .collect();
220
221 let mut adj: Vec<Vec<(usize, f64)>> = vec![Vec::new(); n];
224 let mut total_weight = 0.0;
225
226 for edge_ref in self.graph.edge_indices() {
227 if let Some((src_idx, dst_idx)) = self.graph.edge_endpoints(edge_ref) {
228 let w = self.graph[edge_ref];
229 if let (Some(&si), Some(&di)) = (idx_pos.get(&src_idx), idx_pos.get(&dst_idx)) {
230 adj[si].push((di, w));
231 adj[di].push((si, w));
232 total_weight += w; }
234 }
235 }
236
237 if total_weight == 0.0 {
238 return indices
240 .iter()
241 .filter_map(|&idx| self.graph.node_weight(idx).map(|id| vec![id.clone()]))
242 .collect();
243 }
244
245 let m = total_weight;
247 let m2 = 2.0 * m;
248
249 let k: Vec<f64> = (0..n)
251 .map(|i| adj[i].iter().map(|&(_, w)| w).sum())
252 .collect();
253
254 let mut community: Vec<usize> = (0..n).collect();
256
257 let mut improved = true;
259 let max_passes = 100;
260 let mut pass = 0;
261
262 while improved && pass < max_passes {
263 improved = false;
264 pass += 1;
265
266 for i in 0..n {
267 let current_comm = community[i];
268
269 let mut comm_weights: HashMap<usize, f64> = HashMap::new();
271 for &(j, w) in &adj[i] {
272 *comm_weights.entry(community[j]).or_insert(0.0) += w;
273 }
274
275 let mut comm_degree_sum: HashMap<usize, f64> = HashMap::new();
277 for j in 0..n {
278 *comm_degree_sum.entry(community[j]).or_insert(0.0) += k[j];
279 }
280
281 let ki = k[i];
282
283 let w_in_current = comm_weights.get(¤t_comm).copied().unwrap_or(0.0);
285 let sigma_current = comm_degree_sum.get(¤t_comm).copied().unwrap_or(0.0);
286 let remove_cost = w_in_current - resolution * ki * (sigma_current - ki) / m2;
287
288 let mut best_comm = current_comm;
290 let mut best_gain = 0.0;
291
292 for (&comm, &w_in_comm) in &comm_weights {
293 if comm == current_comm {
294 continue;
295 }
296 let sigma_comm = comm_degree_sum.get(&comm).copied().unwrap_or(0.0);
297 let gain = w_in_comm - resolution * ki * sigma_comm / m2 - remove_cost;
298 if gain > best_gain {
299 best_gain = gain;
300 best_comm = comm;
301 }
302 }
303
304 if best_comm != current_comm {
305 community[i] = best_comm;
306 improved = true;
307 }
308 }
309 }
310
311 let mut groups: HashMap<usize, Vec<String>> = HashMap::new();
313 for (i, &idx) in indices.iter().enumerate() {
314 if let Some(node_id) = self.graph.node_weight(idx) {
315 groups
316 .entry(community[i])
317 .or_default()
318 .push(node_id.clone());
319 }
320 }
321
322 let mut result: Vec<Vec<String>> = groups.into_values().collect();
323 for group in result.iter_mut() {
324 group.sort();
325 }
326 result.sort();
327 result
328 }
329
330 pub fn betweenness_centrality(&self) -> HashMap<String, f64> {
338 let n = self.graph.node_count();
339 if n <= 2 {
340 return self
341 .graph
342 .node_indices()
343 .filter_map(|idx| self.graph.node_weight(idx).map(|id| (id.clone(), 0.0)))
344 .collect();
345 }
346
347 let indices: Vec<NodeIndex> = self.graph.node_indices().collect();
348 let idx_pos: HashMap<NodeIndex, usize> = indices
349 .iter()
350 .enumerate()
351 .map(|(i, &idx)| (idx, i))
352 .collect();
353
354 let mut centrality = vec![0.0f64; n];
355
356 let sources: Vec<usize> = if n > 1000 {
358 let sample_size = (n as f64).sqrt() as usize;
359 let step = n / sample_size;
361 (0..sample_size).map(|i| i * step).collect()
362 } else {
363 (0..n).collect()
364 };
365
366 let scale = if n > 1000 {
367 n as f64 / sources.len() as f64
368 } else {
369 1.0
370 };
371
372 for &s in &sources {
373 let mut stack: Vec<usize> = Vec::new();
375 let mut predecessors: Vec<Vec<usize>> = vec![Vec::new(); n];
376 let mut sigma = vec![0.0f64; n]; sigma[s] = 1.0;
378 let mut dist: Vec<i64> = vec![-1; n];
379 dist[s] = 0;
380
381 let mut queue: VecDeque<usize> = VecDeque::new();
382 queue.push_back(s);
383
384 while let Some(v) = queue.pop_front() {
385 stack.push(v);
386 let v_idx = indices[v];
387 for neighbor in self.graph.neighbors_directed(v_idx, Direction::Outgoing) {
388 if let Some(&w) = idx_pos.get(&neighbor) {
389 if dist[w] < 0 {
390 dist[w] = dist[v] + 1;
391 queue.push_back(w);
392 }
393 if dist[w] == dist[v] + 1 {
394 sigma[w] += sigma[v];
395 predecessors[w].push(v);
396 }
397 }
398 }
399 }
400
401 let mut delta = vec![0.0f64; n];
402 while let Some(w) = stack.pop() {
403 for &v in &predecessors[w] {
404 delta[v] += (sigma[v] / sigma[w]) * (1.0 + delta[w]);
405 }
406 if w != s {
407 centrality[w] += delta[w];
408 }
409 }
410 }
411
412 let norm = ((n - 1) * (n - 2)) as f64;
414 indices
415 .iter()
416 .enumerate()
417 .filter_map(|(i, &idx)| {
418 self.graph
419 .node_weight(idx)
420 .map(|id| (id.clone(), centrality[i] * scale / norm))
421 })
422 .collect()
423 }
424
425 pub fn strongly_connected_components(&self) -> Vec<Vec<String>> {
430 let sccs = petgraph::algo::tarjan_scc(&self.graph);
431
432 let mut result: Vec<Vec<String>> = sccs
433 .into_iter()
434 .map(|component| {
435 let mut ids: Vec<String> = component
436 .into_iter()
437 .filter_map(|idx| self.graph.node_weight(idx).cloned())
438 .collect();
439 ids.sort();
440 ids
441 })
442 .collect();
443
444 result.sort();
445 result
446 }
447
448 pub fn topological_layers(&self) -> Vec<Vec<String>> {
456 let n = self.graph.node_count();
457 if n == 0 {
458 return Vec::new();
459 }
460
461 let indices: Vec<NodeIndex> = self.graph.node_indices().collect();
462 let idx_pos: HashMap<NodeIndex, usize> = indices
463 .iter()
464 .enumerate()
465 .map(|(i, &idx)| (idx, i))
466 .collect();
467
468 let sccs = petgraph::algo::tarjan_scc(&self.graph);
470
471 let mut node_to_scc = vec![0usize; n];
473 for (scc_idx, scc) in sccs.iter().enumerate() {
474 for &node_idx in scc {
475 if let Some(&pos) = idx_pos.get(&node_idx) {
476 node_to_scc[pos] = scc_idx;
477 }
478 }
479 }
480
481 let num_sccs = sccs.len();
482
483 let mut condensed_adj: Vec<HashSet<usize>> = vec![HashSet::new(); num_sccs];
485 let mut condensed_in_degree = vec![0usize; num_sccs];
486
487 for &idx in &indices {
488 if let Some(&src_pos) = idx_pos.get(&idx) {
489 let src_scc = node_to_scc[src_pos];
490 for neighbor in self.graph.neighbors_directed(idx, Direction::Outgoing) {
491 if let Some(&dst_pos) = idx_pos.get(&neighbor) {
492 let dst_scc = node_to_scc[dst_pos];
493 if src_scc != dst_scc && condensed_adj[src_scc].insert(dst_scc) {
494 condensed_in_degree[dst_scc] += 1;
495 }
496 }
497 }
498 }
499 }
500
501 let mut queue: VecDeque<usize> = VecDeque::new();
503 for (i, °) in condensed_in_degree.iter().enumerate().take(num_sccs) {
504 if deg == 0 {
505 queue.push_back(i);
506 }
507 }
508
509 let mut scc_layers: Vec<Vec<usize>> = Vec::new();
510 while !queue.is_empty() {
511 let mut layer = Vec::new();
512 let mut next_queue = VecDeque::new();
513
514 while let Some(scc_idx) = queue.pop_front() {
515 layer.push(scc_idx);
516 for &neighbor_scc in &condensed_adj[scc_idx] {
517 condensed_in_degree[neighbor_scc] -= 1;
518 if condensed_in_degree[neighbor_scc] == 0 {
519 next_queue.push_back(neighbor_scc);
520 }
521 }
522 }
523
524 scc_layers.push(layer);
525 queue = next_queue;
526 }
527
528 let mut result: Vec<Vec<String>> = Vec::new();
530 for scc_layer in scc_layers {
531 let mut layer_nodes: Vec<String> = Vec::new();
532 for scc_idx in scc_layer {
533 for &node_idx in &sccs[scc_idx] {
534 if let Some(id) = self.graph.node_weight(node_idx) {
535 layer_nodes.push(id.clone());
536 }
537 }
538 }
539 layer_nodes.sort();
540 result.push(layer_nodes);
541 }
542
543 result
544 }
545
546 pub fn subgraph_top_n(
549 &self,
550 n: usize,
551 namespace: Option<&str>,
552 kinds: Option<&[NodeKind]>,
553 ) -> (Vec<GraphNode>, Vec<Edge>) {
554 let mut candidates: Vec<&GraphNode> = self
555 .nodes
556 .values()
557 .filter(|node| {
558 if let Some(ns) = namespace {
559 match &node.namespace {
560 Some(node_ns) => node_ns == ns,
561 None => false,
562 }
563 } else {
564 true
565 }
566 })
567 .filter(|node| {
568 if let Some(k) = kinds {
569 k.contains(&node.kind)
570 } else {
571 true
572 }
573 })
574 .collect();
575
576 candidates.sort_by(|a, b| {
578 b.centrality
579 .partial_cmp(&a.centrality)
580 .unwrap_or(std::cmp::Ordering::Equal)
581 });
582
583 candidates.truncate(n);
585
586 let top_ids: HashSet<&str> = candidates.iter().map(|node| node.id.as_str()).collect();
587 let nodes_vec: Vec<GraphNode> = candidates.into_iter().cloned().collect();
588
589 let edges_vec: Vec<Edge> = self
591 .edges
592 .values()
593 .filter(|edge| {
594 top_ids.contains(edge.src.as_str()) && top_ids.contains(edge.dst.as_str())
595 })
596 .cloned()
597 .collect();
598
599 (nodes_vec, edges_vec)
600 }
601
602 pub fn louvain_with_assignment(&self, resolution: f64) -> HashMap<String, usize> {
604 let communities = self.louvain_communities(resolution);
605 let mut assignment = HashMap::new();
606 for (idx, community) in communities.into_iter().enumerate() {
607 for node_id in community {
608 assignment.insert(node_id, idx);
609 }
610 }
611 assignment
612 }
613}
614
615#[cfg(test)]
616#[path = "tests/algorithms_tests.rs"]
617mod tests;