1use super::GraphEngine;
2use codemem_core::{Edge, GraphNode, NodeKind, RelationshipType};
3use petgraph::graph::NodeIndex;
4use petgraph::Direction;
5use std::collections::{HashMap, HashSet, VecDeque};
6
7impl GraphEngine {
8 pub fn pagerank_for_namespace(
16 &self,
17 namespace: &str,
18 damping: f64,
19 iterations: usize,
20 tolerance: f64,
21 ) -> HashMap<String, f64> {
22 let ns_indices: Vec<NodeIndex> = self
24 .graph
25 .node_indices()
26 .filter(|&idx| {
27 self.graph
28 .node_weight(idx)
29 .and_then(|id| self.nodes.get(id))
30 .and_then(|n| n.namespace.as_deref())
31 == Some(namespace)
32 })
33 .collect();
34
35 let n = ns_indices.len();
36 if n == 0 {
37 tracing::debug!(
38 namespace = %namespace,
39 "PageRank requested for namespace with no nodes"
40 );
41 return HashMap::new();
42 }
43
44 let ns_id_set: HashSet<NodeIndex> = ns_indices.iter().copied().collect();
45
46 let idx_pos: HashMap<NodeIndex, usize> = ns_indices
47 .iter()
48 .enumerate()
49 .map(|(i, &idx)| (idx, i))
50 .collect();
51
52 let nf = n as f64;
53 let initial = 1.0 / nf;
54 let mut scores = vec![initial; n];
55
56 let out_degree: Vec<usize> = ns_indices
61 .iter()
62 .map(|&idx| {
63 self.graph
64 .neighbors_directed(idx, Direction::Outgoing)
65 .filter(|nb| ns_id_set.contains(nb))
66 .count()
67 })
68 .collect();
69
70 for _ in 0..iterations {
71 let mut new_scores = vec![(1.0 - damping) / nf; n];
72
73 for (i, &idx) in ns_indices.iter().enumerate() {
74 let deg = out_degree[i];
75 if deg == 0 {
76 let share = damping * scores[i] / nf;
80 for ns in new_scores.iter_mut() {
81 *ns += share;
82 }
83 } else {
84 let share = damping * scores[i] / deg as f64;
85 for neighbor in self.graph.neighbors_directed(idx, Direction::Outgoing) {
86 if let Some(&pos) = idx_pos.get(&neighbor) {
87 new_scores[pos] += share;
88 }
89 }
90 }
91 }
92
93 let diff: f64 = scores
94 .iter()
95 .zip(new_scores.iter())
96 .map(|(a, b)| (a - b).abs())
97 .sum();
98
99 scores = new_scores;
100
101 if diff < tolerance {
102 break;
103 }
104 }
105
106 ns_indices
107 .iter()
108 .enumerate()
109 .filter_map(|(i, &idx)| {
110 self.graph
111 .node_weight(idx)
112 .map(|id| (id.clone(), scores[i]))
113 })
114 .collect()
115 }
116
117 pub fn pagerank(
125 &self,
126 damping: f64,
127 iterations: usize,
128 tolerance: f64,
129 ) -> HashMap<String, f64> {
130 let n = self.graph.node_count();
131 if n == 0 {
132 return HashMap::new();
133 }
134
135 let nf = n as f64;
136 let initial = 1.0 / nf;
137
138 let indices: Vec<NodeIndex> = self.graph.node_indices().collect();
140 let idx_pos: HashMap<NodeIndex, usize> = indices
141 .iter()
142 .enumerate()
143 .map(|(i, &idx)| (idx, i))
144 .collect();
145
146 let mut scores = vec![initial; n];
147
148 let out_degree: Vec<usize> = indices
150 .iter()
151 .map(|&idx| {
152 self.graph
153 .neighbors_directed(idx, Direction::Outgoing)
154 .count()
155 })
156 .collect();
157
158 for _ in 0..iterations {
159 let mut new_scores = vec![(1.0 - damping) / nf; n];
160
161 for (i, &idx) in indices.iter().enumerate() {
163 let deg = out_degree[i];
164 if deg == 0 {
165 let share = damping * scores[i] / nf;
167 for ns in new_scores.iter_mut() {
168 *ns += share;
169 }
170 } else {
171 let share = damping * scores[i] / deg as f64;
172 for neighbor in self.graph.neighbors_directed(idx, Direction::Outgoing) {
173 if let Some(&pos) = idx_pos.get(&neighbor) {
174 new_scores[pos] += share;
175 }
176 }
177 }
178 }
179
180 let diff: f64 = scores
182 .iter()
183 .zip(new_scores.iter())
184 .map(|(a, b)| (a - b).abs())
185 .sum();
186
187 scores = new_scores;
188
189 if diff < tolerance {
190 break;
191 }
192 }
193
194 indices
196 .iter()
197 .enumerate()
198 .filter_map(|(i, &idx)| {
199 self.graph
200 .node_weight(idx)
201 .map(|id| (id.clone(), scores[i]))
202 })
203 .collect()
204 }
205
206 #[cfg(test)]
213 pub fn personalized_pagerank(
214 &self,
215 seed_weights: &HashMap<String, f64>,
216 damping: f64,
217 iterations: usize,
218 tolerance: f64,
219 ) -> HashMap<String, f64> {
220 let n = self.graph.node_count();
221 if n == 0 {
222 return HashMap::new();
223 }
224
225 let nf = n as f64;
226
227 let indices: Vec<NodeIndex> = self.graph.node_indices().collect();
228 let idx_pos: HashMap<NodeIndex, usize> = indices
229 .iter()
230 .enumerate()
231 .map(|(i, &idx)| (idx, i))
232 .collect();
233
234 let mut teleport = vec![0.0f64; n];
236 let mut teleport_sum = 0.0;
237 for (i, &idx) in indices.iter().enumerate() {
238 if let Some(node_id) = self.graph.node_weight(idx) {
239 if let Some(&w) = seed_weights.get(node_id) {
240 teleport[i] = w;
241 teleport_sum += w;
242 }
243 }
244 }
245 if teleport_sum > 0.0 {
247 for t in teleport.iter_mut() {
248 *t /= teleport_sum;
249 }
250 } else {
251 for t in teleport.iter_mut() {
252 *t = 1.0 / nf;
253 }
254 }
255
256 let initial = 1.0 / nf;
257 let mut scores = vec![initial; n];
258
259 let out_degree: Vec<usize> = indices
260 .iter()
261 .map(|&idx| {
262 self.graph
263 .neighbors_directed(idx, Direction::Outgoing)
264 .count()
265 })
266 .collect();
267
268 for _ in 0..iterations {
269 let mut new_scores: Vec<f64> = teleport.iter().map(|&t| (1.0 - damping) * t).collect();
270
271 for (i, &idx) in indices.iter().enumerate() {
272 let deg = out_degree[i];
273 if deg == 0 {
274 let share = damping * scores[i];
276 for (j, t) in teleport.iter().enumerate() {
277 new_scores[j] += share * t;
278 }
279 } else {
280 let share = damping * scores[i] / deg as f64;
281 for neighbor in self.graph.neighbors_directed(idx, Direction::Outgoing) {
282 if let Some(&pos) = idx_pos.get(&neighbor) {
283 new_scores[pos] += share;
284 }
285 }
286 }
287 }
288
289 let diff: f64 = scores
290 .iter()
291 .zip(new_scores.iter())
292 .map(|(a, b)| (a - b).abs())
293 .sum();
294
295 scores = new_scores;
296
297 if diff < tolerance {
298 break;
299 }
300 }
301
302 indices
303 .iter()
304 .enumerate()
305 .filter_map(|(i, &idx)| {
306 self.graph
307 .node_weight(idx)
308 .map(|id| (id.clone(), scores[i]))
309 })
310 .collect()
311 }
312
313 pub fn louvain_communities(&self, resolution: f64) -> Vec<Vec<String>> {
319 let n = self.graph.node_count();
320 if n == 0 {
321 return Vec::new();
322 }
323
324 let indices: Vec<NodeIndex> = self.graph.node_indices().collect();
325 let idx_pos: HashMap<NodeIndex, usize> = indices
326 .iter()
327 .enumerate()
328 .map(|(i, &idx)| (idx, i))
329 .collect();
330
331 let edge_rel_by_idx: HashMap<petgraph::graph::EdgeIndex, RelationshipType> = {
336 let mut src_dst_rels: HashMap<(&str, &str), Vec<RelationshipType>> = HashMap::new();
340 for e in self.edges.values() {
341 src_dst_rels
342 .entry((e.src.as_str(), e.dst.as_str()))
343 .or_default()
344 .push(e.relationship);
345 }
346 let mut lookup = HashMap::new();
347 for edge_ref in self.graph.edge_indices() {
348 if let Some((src_idx, dst_idx)) = self.graph.edge_endpoints(edge_ref) {
349 if let (Some(src_id), Some(dst_id)) = (
350 self.graph.node_weight(src_idx),
351 self.graph.node_weight(dst_idx),
352 ) {
353 if let Some(rels) =
354 src_dst_rels.get_mut(&(src_id.as_str(), dst_id.as_str()))
355 {
356 if let Some(rel) = rels.pop() {
357 lookup.insert(edge_ref, rel);
358 }
359 }
360 }
361 }
362 }
363 lookup
364 };
365
366 let mut undirected_weights: HashMap<(usize, usize), f64> = HashMap::new();
372 for edge_ref in self.graph.edge_indices() {
373 if let Some((src_idx, dst_idx)) = self.graph.edge_endpoints(edge_ref) {
374 let w = self.graph[edge_ref];
375 if let (Some(&si), Some(&di)) = (idx_pos.get(&src_idx), idx_pos.get(&dst_idx)) {
376 let multiplier = edge_rel_by_idx
377 .get(&edge_ref)
378 .map(|rel| match rel {
379 RelationshipType::Extends
380 | RelationshipType::Implements
381 | RelationshipType::Inherits => 0.5,
382 _ => 1.0,
383 })
384 .unwrap_or(1.0);
385
386 let key = if si <= di { (si, di) } else { (di, si) };
387 *undirected_weights.entry(key).or_insert(0.0) += w * multiplier;
388 }
389 }
390 }
391
392 let mut adj: Vec<Vec<(usize, f64)>> = vec![Vec::new(); n];
393 let mut total_weight = 0.0;
394
395 for (&(si, di), &w) in &undirected_weights {
396 adj[si].push((di, w));
397 if si != di {
398 adj[di].push((si, w));
399 }
400 total_weight += w;
401 }
402
403 if total_weight == 0.0 {
404 return indices
406 .iter()
407 .filter_map(|&idx| self.graph.node_weight(idx).map(|id| vec![id.clone()]))
408 .collect();
409 }
410
411 let m = total_weight;
413 let m2 = 2.0 * m;
414
415 let k: Vec<f64> = (0..n)
417 .map(|i| adj[i].iter().map(|&(_, w)| w).sum())
418 .collect();
419
420 let mut community: Vec<usize> = (0..n).collect();
422
423 let mut sigma_tot: Vec<f64> = k.clone();
426
427 let mut improved = true;
429 let max_passes = 100;
430 let mut pass = 0;
431
432 while improved && pass < max_passes {
433 improved = false;
434 pass += 1;
435
436 for i in 0..n {
437 let current_comm = community[i];
438 let ki = k[i];
439
440 let mut comm_weights: HashMap<usize, f64> = HashMap::new();
442 for &(j, w) in &adj[i] {
443 *comm_weights.entry(community[j]).or_insert(0.0) += w;
444 }
445
446 let w_in_current = comm_weights.get(¤t_comm).copied().unwrap_or(0.0);
450 let sigma_current = sigma_tot[current_comm];
451 let remove_cost =
452 w_in_current / m - resolution * ki * (sigma_current - ki) / (m2 * m);
453
454 let mut best_comm = current_comm;
456 let mut best_gain = 0.0;
457
458 for (&comm, &w_in_comm) in &comm_weights {
459 if comm == current_comm {
460 continue;
461 }
462 let sigma_comm = sigma_tot[comm];
463 let gain =
464 w_in_comm / m - resolution * ki * sigma_comm / (m2 * m) - remove_cost;
465 if gain > best_gain {
466 best_gain = gain;
467 best_comm = comm;
468 }
469 }
470
471 if best_comm != current_comm {
472 sigma_tot[current_comm] -= ki;
474 sigma_tot[best_comm] += ki;
475 community[i] = best_comm;
476 improved = true;
477 }
478 }
479 }
480
481 let mut groups: HashMap<usize, Vec<String>> = HashMap::new();
483 for (i, &idx) in indices.iter().enumerate() {
484 if let Some(node_id) = self.graph.node_weight(idx) {
485 groups
486 .entry(community[i])
487 .or_default()
488 .push(node_id.clone());
489 }
490 }
491
492 let mut result: Vec<Vec<String>> = groups.into_values().collect();
493 for group in result.iter_mut() {
494 group.sort();
495 }
496 result.sort();
497 result
498 }
499
500 pub fn betweenness_centrality(&self) -> HashMap<String, f64> {
508 let n = self.graph.node_count();
509 if n <= 2 {
510 return self
511 .graph
512 .node_indices()
513 .filter_map(|idx| self.graph.node_weight(idx).map(|id| (id.clone(), 0.0)))
514 .collect();
515 }
516
517 let indices: Vec<NodeIndex> = self.graph.node_indices().collect();
518 let idx_pos: HashMap<NodeIndex, usize> = indices
519 .iter()
520 .enumerate()
521 .map(|(i, &idx)| (idx, i))
522 .collect();
523
524 let mut centrality = vec![0.0f64; n];
525
526 let sources: Vec<usize> = if n > 1000 {
528 let sample_size = (n as f64).sqrt() as usize;
529 let step = n / sample_size;
531 (0..sample_size).map(|i| i * step).collect()
532 } else {
533 (0..n).collect()
534 };
535
536 let scale = if n > 1000 {
537 n as f64 / sources.len() as f64
538 } else {
539 1.0
540 };
541
542 for &s in &sources {
543 let mut stack: Vec<usize> = Vec::new();
545 let mut predecessors: Vec<Vec<usize>> = vec![Vec::new(); n];
546 let mut sigma = vec![0.0f64; n]; sigma[s] = 1.0;
548 let mut dist: Vec<i64> = vec![-1; n];
549 dist[s] = 0;
550
551 let mut queue: VecDeque<usize> = VecDeque::new();
552 queue.push_back(s);
553
554 while let Some(v) = queue.pop_front() {
555 stack.push(v);
556 let v_idx = indices[v];
557 for neighbor in self.graph.neighbors_directed(v_idx, Direction::Outgoing) {
558 if let Some(&w) = idx_pos.get(&neighbor) {
559 if dist[w] < 0 {
560 dist[w] = dist[v] + 1;
561 queue.push_back(w);
562 }
563 if dist[w] == dist[v] + 1 {
564 sigma[w] += sigma[v];
565 predecessors[w].push(v);
566 }
567 }
568 }
569 }
570
571 let mut delta = vec![0.0f64; n];
572 while let Some(w) = stack.pop() {
573 for &v in &predecessors[w] {
574 delta[v] += (sigma[v] / sigma[w]) * (1.0 + delta[w]);
575 }
576 if w != s {
577 centrality[w] += delta[w];
578 }
579 }
580 }
581
582 let norm = ((n - 1) * (n - 2)) as f64;
584 indices
585 .iter()
586 .enumerate()
587 .filter_map(|(i, &idx)| {
588 self.graph
589 .node_weight(idx)
590 .map(|id| (id.clone(), centrality[i] * scale / norm))
591 })
592 .collect()
593 }
594
595 pub fn strongly_connected_components(&self) -> Vec<Vec<String>> {
600 let sccs = petgraph::algo::tarjan_scc(&self.graph);
601
602 let mut result: Vec<Vec<String>> = sccs
603 .into_iter()
604 .map(|component| {
605 let mut ids: Vec<String> = component
606 .into_iter()
607 .filter_map(|idx| self.graph.node_weight(idx).cloned())
608 .collect();
609 ids.sort();
610 ids
611 })
612 .collect();
613
614 result.sort();
615 result
616 }
617
618 pub fn topological_layers(&self) -> Vec<Vec<String>> {
626 let n = self.graph.node_count();
627 if n == 0 {
628 return Vec::new();
629 }
630
631 let indices: Vec<NodeIndex> = self.graph.node_indices().collect();
632 let idx_pos: HashMap<NodeIndex, usize> = indices
633 .iter()
634 .enumerate()
635 .map(|(i, &idx)| (idx, i))
636 .collect();
637
638 let sccs = petgraph::algo::tarjan_scc(&self.graph);
640
641 let mut node_to_scc = vec![0usize; n];
643 for (scc_idx, scc) in sccs.iter().enumerate() {
644 for &node_idx in scc {
645 if let Some(&pos) = idx_pos.get(&node_idx) {
646 node_to_scc[pos] = scc_idx;
647 }
648 }
649 }
650
651 let num_sccs = sccs.len();
652
653 let mut condensed_adj: Vec<HashSet<usize>> = vec![HashSet::new(); num_sccs];
655 let mut condensed_in_degree = vec![0usize; num_sccs];
656
657 for &idx in &indices {
658 if let Some(&src_pos) = idx_pos.get(&idx) {
659 let src_scc = node_to_scc[src_pos];
660 for neighbor in self.graph.neighbors_directed(idx, Direction::Outgoing) {
661 if let Some(&dst_pos) = idx_pos.get(&neighbor) {
662 let dst_scc = node_to_scc[dst_pos];
663 if src_scc != dst_scc && condensed_adj[src_scc].insert(dst_scc) {
664 condensed_in_degree[dst_scc] += 1;
665 }
666 }
667 }
668 }
669 }
670
671 let mut queue: VecDeque<usize> = VecDeque::new();
673 for (i, °) in condensed_in_degree.iter().enumerate().take(num_sccs) {
674 if deg == 0 {
675 queue.push_back(i);
676 }
677 }
678
679 let mut scc_layers: Vec<Vec<usize>> = Vec::new();
680 while !queue.is_empty() {
681 let mut layer = Vec::new();
682 let mut next_queue = VecDeque::new();
683
684 while let Some(scc_idx) = queue.pop_front() {
685 layer.push(scc_idx);
686 for &neighbor_scc in &condensed_adj[scc_idx] {
687 condensed_in_degree[neighbor_scc] -= 1;
688 if condensed_in_degree[neighbor_scc] == 0 {
689 next_queue.push_back(neighbor_scc);
690 }
691 }
692 }
693
694 scc_layers.push(layer);
695 queue = next_queue;
696 }
697
698 let mut result: Vec<Vec<String>> = Vec::new();
700 for scc_layer in scc_layers {
701 let mut layer_nodes: Vec<String> = Vec::new();
702 for scc_idx in scc_layer {
703 for &node_idx in &sccs[scc_idx] {
704 if let Some(id) = self.graph.node_weight(node_idx) {
705 layer_nodes.push(id.clone());
706 }
707 }
708 }
709 layer_nodes.sort();
710 result.push(layer_nodes);
711 }
712
713 result
714 }
715
716 pub fn subgraph_top_n(
723 &self,
724 n: usize,
725 namespace: Option<&str>,
726 kinds: Option<&[NodeKind]>,
727 ) -> (Vec<GraphNode>, Vec<Edge>) {
728 let ns_filter = |node: &&GraphNode| -> bool {
729 if let Some(ns) = namespace {
730 node.namespace.as_deref() == Some(ns)
731 } else {
732 true
733 }
734 };
735 let kind_filter = |node: &&GraphNode| -> bool {
736 if let Some(k) = kinds {
737 k.contains(&node.kind)
738 } else {
739 true
740 }
741 };
742
743 let mut candidates: Vec<&GraphNode> = self
744 .nodes
745 .values()
746 .filter(ns_filter)
747 .filter(kind_filter)
748 .collect();
749
750 candidates.sort_by(|a, b| {
752 b.centrality
753 .partial_cmp(&a.centrality)
754 .unwrap_or(std::cmp::Ordering::Equal)
755 });
756
757 candidates.truncate(n);
759
760 let mut top_ids: HashSet<&str> = candidates.iter().map(|node| node.id.as_str()).collect();
761
762 let budget = (n / 5).max(1);
766 let mut extra_ids: Vec<&str> = Vec::new();
767 for edge in self.edges.values() {
768 if extra_ids.len() >= budget {
769 break;
770 }
771 if edge.relationship == RelationshipType::Contains
772 || edge.relationship == RelationshipType::PartOf
773 {
774 continue;
775 }
776 let (in_top, other) = if top_ids.contains(edge.src.as_str()) {
777 (true, edge.dst.as_str())
778 } else if top_ids.contains(edge.dst.as_str()) {
779 (true, edge.src.as_str())
780 } else {
781 (false, "")
782 };
783 if in_top && !top_ids.contains(other) {
784 if let Some(node) = self.nodes.get(other) {
785 if ns_filter(&node) && kind_filter(&node) {
786 extra_ids.push(other);
787 top_ids.insert(other);
788 }
789 }
790 }
791 }
792
793 let mut nodes_vec: Vec<GraphNode> = candidates.into_iter().cloned().collect();
794 for id in &extra_ids {
795 if let Some(node) = self.nodes.get(*id) {
796 nodes_vec.push(node.clone());
797 }
798 }
799
800 let edges_vec: Vec<Edge> = self
802 .edges
803 .values()
804 .filter(|edge| {
805 top_ids.contains(edge.src.as_str()) && top_ids.contains(edge.dst.as_str())
806 })
807 .cloned()
808 .collect();
809
810 (nodes_vec, edges_vec)
811 }
812
813 pub fn label_community(&self, member_ids: &[&str]) -> String {
821 let mut dir_counts: HashMap<&str, usize> = HashMap::new();
822
823 for &node_id in member_ids {
824 if let Some(node) = self.nodes.get(node_id) {
825 if let Some(file_path) = node.payload.get("file_path").and_then(|v| v.as_str()) {
826 if let Some(dir) = file_path.rsplit('/').nth(1) {
828 *dir_counts.entry(dir).or_insert(0) += 1;
829 }
830 }
831 }
832 }
833
834 if dir_counts.is_empty() {
835 return "unknown".to_string();
836 }
837
838 let mut sorted: Vec<_> = dir_counts.into_iter().collect();
839 sorted.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(b.0)));
840
841 if sorted.len() == 1 {
842 sorted[0].0.to_string()
843 } else if sorted.len() <= 2 {
844 format!("{}+{}", sorted[0].0, sorted[1].0)
845 } else {
846 let extra = sorted.len() - 2;
847 format!("{}+{} +{extra} more", sorted[0].0, sorted[1].0)
848 }
849 }
850
851 pub fn louvain_with_assignment(&self, resolution: f64) -> HashMap<String, usize> {
853 let communities = self.louvain_communities(resolution);
854 let mut assignment = HashMap::new();
855 for (idx, community) in communities.into_iter().enumerate() {
856 for node_id in community {
857 assignment.insert(node_id, idx);
858 }
859 }
860 assignment
861 }
862}
863
864#[cfg(test)]
865#[path = "../tests/graph_algorithms_tests.rs"]
866mod tests;