Skip to main content

codemem_graph/
algorithms.rs

1use crate::GraphEngine;
2use codemem_core::{Edge, GraphNode, NodeKind};
3use petgraph::graph::NodeIndex;
4use petgraph::Direction;
5use std::collections::{HashMap, HashSet, VecDeque};
6
7impl GraphEngine {
8    /// Compute PageRank scores for all nodes using power iteration.
9    ///
10    /// - `damping`: probability of following an edge (default 0.85)
11    /// - `iterations`: max number of power iterations (default 100)
12    /// - `tolerance`: convergence threshold (default 1e-6)
13    ///
14    /// Returns a map from node ID to PageRank score.
15    pub fn pagerank(
16        &self,
17        damping: f64,
18        iterations: usize,
19        tolerance: f64,
20    ) -> HashMap<String, f64> {
21        let n = self.graph.node_count();
22        if n == 0 {
23            return HashMap::new();
24        }
25
26        let nf = n as f64;
27        let initial = 1.0 / nf;
28
29        // Collect all node indices in a stable order
30        let indices: Vec<NodeIndex> = self.graph.node_indices().collect();
31        let idx_pos: HashMap<NodeIndex, usize> = indices
32            .iter()
33            .enumerate()
34            .map(|(i, &idx)| (idx, i))
35            .collect();
36
37        let mut scores = vec![initial; n];
38
39        // Precompute out-degrees
40        let out_degree: Vec<usize> = indices
41            .iter()
42            .map(|&idx| {
43                self.graph
44                    .neighbors_directed(idx, Direction::Outgoing)
45                    .count()
46            })
47            .collect();
48
49        for _ in 0..iterations {
50            let mut new_scores = vec![(1.0 - damping) / nf; n];
51
52            // Distribute rank from each node to its out-neighbors
53            for (i, &idx) in indices.iter().enumerate() {
54                let deg = out_degree[i];
55                if deg == 0 {
56                    // Dangling node: distribute evenly to all nodes
57                    let share = damping * scores[i] / nf;
58                    for ns in new_scores.iter_mut() {
59                        *ns += share;
60                    }
61                } else {
62                    let share = damping * scores[i] / deg as f64;
63                    for neighbor in self.graph.neighbors_directed(idx, Direction::Outgoing) {
64                        if let Some(&pos) = idx_pos.get(&neighbor) {
65                            new_scores[pos] += share;
66                        }
67                    }
68                }
69            }
70
71            // Check convergence
72            let diff: f64 = scores
73                .iter()
74                .zip(new_scores.iter())
75                .map(|(a, b)| (a - b).abs())
76                .sum();
77
78            scores = new_scores;
79
80            if diff < tolerance {
81                break;
82            }
83        }
84
85        // Map back to node IDs
86        indices
87            .iter()
88            .enumerate()
89            .filter_map(|(i, &idx)| {
90                self.graph
91                    .node_weight(idx)
92                    .map(|id| (id.clone(), scores[i]))
93            })
94            .collect()
95    }
96
97    /// Compute Personalized PageRank with custom teleport weights.
98    ///
99    /// `seed_weights` maps node IDs to teleport probabilities (will be normalized).
100    /// Nodes not in seed_weights get zero teleport probability.
101    ///
102    /// Used for blast-radius analysis and HippoRAG-2-style retrieval.
103    pub fn personalized_pagerank(
104        &self,
105        seed_weights: &HashMap<String, f64>,
106        damping: f64,
107        iterations: usize,
108        tolerance: f64,
109    ) -> HashMap<String, f64> {
110        let n = self.graph.node_count();
111        if n == 0 {
112            return HashMap::new();
113        }
114
115        let nf = n as f64;
116
117        let indices: Vec<NodeIndex> = self.graph.node_indices().collect();
118        let idx_pos: HashMap<NodeIndex, usize> = indices
119            .iter()
120            .enumerate()
121            .map(|(i, &idx)| (idx, i))
122            .collect();
123
124        // Build and normalize the teleport vector
125        let mut teleport = vec![0.0f64; n];
126        let mut teleport_sum = 0.0;
127        for (i, &idx) in indices.iter().enumerate() {
128            if let Some(node_id) = self.graph.node_weight(idx) {
129                if let Some(&w) = seed_weights.get(node_id) {
130                    teleport[i] = w;
131                    teleport_sum += w;
132                }
133            }
134        }
135        // Normalize; if no seeds provided, fall back to uniform
136        if teleport_sum > 0.0 {
137            for t in teleport.iter_mut() {
138                *t /= teleport_sum;
139            }
140        } else {
141            for t in teleport.iter_mut() {
142                *t = 1.0 / nf;
143            }
144        }
145
146        let initial = 1.0 / nf;
147        let mut scores = vec![initial; n];
148
149        let out_degree: Vec<usize> = indices
150            .iter()
151            .map(|&idx| {
152                self.graph
153                    .neighbors_directed(idx, Direction::Outgoing)
154                    .count()
155            })
156            .collect();
157
158        for _ in 0..iterations {
159            let mut new_scores: Vec<f64> = teleport.iter().map(|&t| (1.0 - damping) * t).collect();
160
161            for (i, &idx) in indices.iter().enumerate() {
162                let deg = out_degree[i];
163                if deg == 0 {
164                    // Dangling node: distribute to teleport targets
165                    let share = damping * scores[i];
166                    for (j, t) in teleport.iter().enumerate() {
167                        new_scores[j] += share * t;
168                    }
169                } else {
170                    let share = damping * scores[i] / deg as f64;
171                    for neighbor in self.graph.neighbors_directed(idx, Direction::Outgoing) {
172                        if let Some(&pos) = idx_pos.get(&neighbor) {
173                            new_scores[pos] += share;
174                        }
175                    }
176                }
177            }
178
179            let diff: f64 = scores
180                .iter()
181                .zip(new_scores.iter())
182                .map(|(a, b)| (a - b).abs())
183                .sum();
184
185            scores = new_scores;
186
187            if diff < tolerance {
188                break;
189            }
190        }
191
192        indices
193            .iter()
194            .enumerate()
195            .filter_map(|(i, &idx)| {
196                self.graph
197                    .node_weight(idx)
198                    .map(|id| (id.clone(), scores[i]))
199            })
200            .collect()
201    }
202
203    /// Detect communities using the Louvain algorithm.
204    ///
205    /// Treats the directed graph as undirected for modularity computation.
206    /// `resolution` controls community granularity (1.0 = standard modularity).
207    /// Returns groups of node IDs, one group per community.
208    pub fn louvain_communities(&self, resolution: f64) -> Vec<Vec<String>> {
209        let n = self.graph.node_count();
210        if n == 0 {
211            return Vec::new();
212        }
213
214        let indices: Vec<NodeIndex> = self.graph.node_indices().collect();
215        let idx_pos: HashMap<NodeIndex, usize> = indices
216            .iter()
217            .enumerate()
218            .map(|(i, &idx)| (idx, i))
219            .collect();
220
221        // Build undirected adjacency with weights.
222        // adj[i] contains (j, weight) for each undirected neighbor.
223        let mut adj: Vec<Vec<(usize, f64)>> = vec![Vec::new(); n];
224        let mut total_weight = 0.0;
225
226        for edge_ref in self.graph.edge_indices() {
227            if let Some((src_idx, dst_idx)) = self.graph.edge_endpoints(edge_ref) {
228                let w = self.graph[edge_ref];
229                if let (Some(&si), Some(&di)) = (idx_pos.get(&src_idx), idx_pos.get(&dst_idx)) {
230                    adj[si].push((di, w));
231                    adj[di].push((si, w));
232                    total_weight += w; // Each undirected edge contributes w (counted once)
233                }
234            }
235        }
236
237        if total_weight == 0.0 {
238            // No edges: each node is its own community
239            return indices
240                .iter()
241                .filter_map(|&idx| self.graph.node_weight(idx).map(|id| vec![id.clone()]))
242                .collect();
243        }
244
245        // m = total edge weight (for undirected: sum of all edge weights)
246        let m = total_weight;
247        let m2 = 2.0 * m;
248
249        // Weighted degree of each node (sum of incident edge weights, undirected)
250        let k: Vec<f64> = (0..n)
251            .map(|i| adj[i].iter().map(|&(_, w)| w).sum())
252            .collect();
253
254        // Initial assignment: each node in its own community
255        let mut community: Vec<usize> = (0..n).collect();
256
257        // Iteratively move nodes to improve modularity
258        let mut improved = true;
259        let max_passes = 100;
260        let mut pass = 0;
261
262        while improved && pass < max_passes {
263            improved = false;
264            pass += 1;
265
266            for i in 0..n {
267                let current_comm = community[i];
268
269                // Compute weights to each neighboring community
270                let mut comm_weights: HashMap<usize, f64> = HashMap::new();
271                for &(j, w) in &adj[i] {
272                    *comm_weights.entry(community[j]).or_insert(0.0) += w;
273                }
274
275                // Sum of degrees in each community (excluding node i for its own community)
276                let mut comm_degree_sum: HashMap<usize, f64> = HashMap::new();
277                for j in 0..n {
278                    *comm_degree_sum.entry(community[j]).or_insert(0.0) += k[j];
279                }
280
281                let ki = k[i];
282
283                // Modularity gain for removing i from its current community
284                let w_in_current = comm_weights.get(&current_comm).copied().unwrap_or(0.0);
285                let sigma_current = comm_degree_sum.get(&current_comm).copied().unwrap_or(0.0);
286                let remove_cost = w_in_current - resolution * ki * (sigma_current - ki) / m2;
287
288                // Find best community to move to
289                let mut best_comm = current_comm;
290                let mut best_gain = 0.0;
291
292                for (&comm, &w_in_comm) in &comm_weights {
293                    if comm == current_comm {
294                        continue;
295                    }
296                    let sigma_comm = comm_degree_sum.get(&comm).copied().unwrap_or(0.0);
297                    let gain = w_in_comm - resolution * ki * sigma_comm / m2 - remove_cost;
298                    if gain > best_gain {
299                        best_gain = gain;
300                        best_comm = comm;
301                    }
302                }
303
304                if best_comm != current_comm {
305                    community[i] = best_comm;
306                    improved = true;
307                }
308            }
309        }
310
311        // Group nodes by community
312        let mut groups: HashMap<usize, Vec<String>> = HashMap::new();
313        for (i, &idx) in indices.iter().enumerate() {
314            if let Some(node_id) = self.graph.node_weight(idx) {
315                groups
316                    .entry(community[i])
317                    .or_default()
318                    .push(node_id.clone());
319            }
320        }
321
322        let mut result: Vec<Vec<String>> = groups.into_values().collect();
323        for group in result.iter_mut() {
324            group.sort();
325        }
326        result.sort();
327        result
328    }
329
330    /// Compute betweenness centrality for all nodes using Brandes' algorithm.
331    ///
332    /// For graphs with more than 1000 nodes, samples sqrt(n) source nodes
333    /// for approximate computation.
334    ///
335    /// Returns a map from node ID to betweenness centrality score (normalized by
336    /// 1/((n-1)(n-2)) for directed graphs).
337    pub fn betweenness_centrality(&self) -> HashMap<String, f64> {
338        let n = self.graph.node_count();
339        if n <= 2 {
340            return self
341                .graph
342                .node_indices()
343                .filter_map(|idx| self.graph.node_weight(idx).map(|id| (id.clone(), 0.0)))
344                .collect();
345        }
346
347        let indices: Vec<NodeIndex> = self.graph.node_indices().collect();
348        let idx_pos: HashMap<NodeIndex, usize> = indices
349            .iter()
350            .enumerate()
351            .map(|(i, &idx)| (idx, i))
352            .collect();
353
354        let mut centrality = vec![0.0f64; n];
355
356        // Determine source nodes (sample for large graphs)
357        let sources: Vec<usize> = if n > 1000 {
358            let sample_size = (n as f64).sqrt() as usize;
359            // Deterministic sampling: evenly spaced
360            let step = n / sample_size;
361            (0..sample_size).map(|i| i * step).collect()
362        } else {
363            (0..n).collect()
364        };
365
366        let scale = if n > 1000 {
367            n as f64 / sources.len() as f64
368        } else {
369            1.0
370        };
371
372        for &s in &sources {
373            // Brandes' algorithm from source s
374            let mut stack: Vec<usize> = Vec::new();
375            let mut predecessors: Vec<Vec<usize>> = vec![Vec::new(); n];
376            let mut sigma = vec![0.0f64; n]; // number of shortest paths
377            sigma[s] = 1.0;
378            let mut dist: Vec<i64> = vec![-1; n];
379            dist[s] = 0;
380
381            let mut queue: VecDeque<usize> = VecDeque::new();
382            queue.push_back(s);
383
384            while let Some(v) = queue.pop_front() {
385                stack.push(v);
386                let v_idx = indices[v];
387                for neighbor in self.graph.neighbors_directed(v_idx, Direction::Outgoing) {
388                    if let Some(&w) = idx_pos.get(&neighbor) {
389                        if dist[w] < 0 {
390                            dist[w] = dist[v] + 1;
391                            queue.push_back(w);
392                        }
393                        if dist[w] == dist[v] + 1 {
394                            sigma[w] += sigma[v];
395                            predecessors[w].push(v);
396                        }
397                    }
398                }
399            }
400
401            let mut delta = vec![0.0f64; n];
402            while let Some(w) = stack.pop() {
403                for &v in &predecessors[w] {
404                    delta[v] += (sigma[v] / sigma[w]) * (1.0 + delta[w]);
405                }
406                if w != s {
407                    centrality[w] += delta[w];
408                }
409            }
410        }
411
412        // Apply sampling scale and normalize
413        let norm = ((n - 1) * (n - 2)) as f64;
414        indices
415            .iter()
416            .enumerate()
417            .filter_map(|(i, &idx)| {
418                self.graph
419                    .node_weight(idx)
420                    .map(|id| (id.clone(), centrality[i] * scale / norm))
421            })
422            .collect()
423    }
424
425    /// Find all strongly connected components using Tarjan's algorithm.
426    ///
427    /// Returns groups of node IDs. Each group is a strongly connected component
428    /// where every node can reach every other node via directed edges.
429    pub fn strongly_connected_components(&self) -> Vec<Vec<String>> {
430        let sccs = petgraph::algo::tarjan_scc(&self.graph);
431
432        let mut result: Vec<Vec<String>> = sccs
433            .into_iter()
434            .map(|component| {
435                let mut ids: Vec<String> = component
436                    .into_iter()
437                    .filter_map(|idx| self.graph.node_weight(idx).cloned())
438                    .collect();
439                ids.sort();
440                ids
441            })
442            .collect();
443
444        result.sort();
445        result
446    }
447
448    /// Compute topological layers using Kahn's algorithm.
449    ///
450    /// Returns layers where all nodes in layer i have no dependencies on nodes
451    /// in layer i or later. For cyclic graphs, SCCs are condensed into single
452    /// super-nodes first, then the resulting DAG is topologically sorted.
453    ///
454    /// Each inner Vec contains the node IDs at that layer.
455    pub fn topological_layers(&self) -> Vec<Vec<String>> {
456        let n = self.graph.node_count();
457        if n == 0 {
458            return Vec::new();
459        }
460
461        let indices: Vec<NodeIndex> = self.graph.node_indices().collect();
462        let idx_pos: HashMap<NodeIndex, usize> = indices
463            .iter()
464            .enumerate()
465            .map(|(i, &idx)| (idx, i))
466            .collect();
467
468        // Step 1: Find SCCs
469        let sccs = petgraph::algo::tarjan_scc(&self.graph);
470
471        // Map each node position to its SCC index
472        let mut node_to_scc = vec![0usize; n];
473        for (scc_idx, scc) in sccs.iter().enumerate() {
474            for &node_idx in scc {
475                if let Some(&pos) = idx_pos.get(&node_idx) {
476                    node_to_scc[pos] = scc_idx;
477                }
478            }
479        }
480
481        let num_sccs = sccs.len();
482
483        // Step 2: Build condensed DAG (SCC graph)
484        let mut condensed_adj: Vec<HashSet<usize>> = vec![HashSet::new(); num_sccs];
485        let mut condensed_in_degree = vec![0usize; num_sccs];
486
487        for &idx in &indices {
488            if let Some(&src_pos) = idx_pos.get(&idx) {
489                let src_scc = node_to_scc[src_pos];
490                for neighbor in self.graph.neighbors_directed(idx, Direction::Outgoing) {
491                    if let Some(&dst_pos) = idx_pos.get(&neighbor) {
492                        let dst_scc = node_to_scc[dst_pos];
493                        if src_scc != dst_scc && condensed_adj[src_scc].insert(dst_scc) {
494                            condensed_in_degree[dst_scc] += 1;
495                        }
496                    }
497                }
498            }
499        }
500
501        // Step 3: Kahn's algorithm on the condensed DAG
502        let mut queue: VecDeque<usize> = VecDeque::new();
503        for (i, &deg) in condensed_in_degree.iter().enumerate().take(num_sccs) {
504            if deg == 0 {
505                queue.push_back(i);
506            }
507        }
508
509        let mut scc_layers: Vec<Vec<usize>> = Vec::new();
510        while !queue.is_empty() {
511            let mut layer = Vec::new();
512            let mut next_queue = VecDeque::new();
513
514            while let Some(scc_idx) = queue.pop_front() {
515                layer.push(scc_idx);
516                for &neighbor_scc in &condensed_adj[scc_idx] {
517                    condensed_in_degree[neighbor_scc] -= 1;
518                    if condensed_in_degree[neighbor_scc] == 0 {
519                        next_queue.push_back(neighbor_scc);
520                    }
521                }
522            }
523
524            scc_layers.push(layer);
525            queue = next_queue;
526        }
527
528        // Step 4: Expand SCC layers back to node IDs
529        let mut result: Vec<Vec<String>> = Vec::new();
530        for scc_layer in scc_layers {
531            let mut layer_nodes: Vec<String> = Vec::new();
532            for scc_idx in scc_layer {
533                for &node_idx in &sccs[scc_idx] {
534                    if let Some(id) = self.graph.node_weight(node_idx) {
535                        layer_nodes.push(id.clone());
536                    }
537                }
538            }
539            layer_nodes.sort();
540            result.push(layer_nodes);
541        }
542
543        result
544    }
545
546    /// Return top-N nodes by centrality and edges between them.
547    /// Optionally filter by namespace and/or node kinds.
548    pub fn subgraph_top_n(
549        &self,
550        n: usize,
551        namespace: Option<&str>,
552        kinds: Option<&[NodeKind]>,
553    ) -> (Vec<GraphNode>, Vec<Edge>) {
554        let mut candidates: Vec<&GraphNode> = self
555            .nodes
556            .values()
557            .filter(|node| {
558                if let Some(ns) = namespace {
559                    match &node.namespace {
560                        Some(node_ns) => node_ns == ns,
561                        None => false,
562                    }
563                } else {
564                    true
565                }
566            })
567            .filter(|node| {
568                if let Some(k) = kinds {
569                    k.contains(&node.kind)
570                } else {
571                    true
572                }
573            })
574            .collect();
575
576        // Sort by centrality descending
577        candidates.sort_by(|a, b| {
578            b.centrality
579                .partial_cmp(&a.centrality)
580                .unwrap_or(std::cmp::Ordering::Equal)
581        });
582
583        // Take top N
584        candidates.truncate(n);
585
586        let top_ids: HashSet<&str> = candidates.iter().map(|node| node.id.as_str()).collect();
587        let nodes_vec: Vec<GraphNode> = candidates.into_iter().cloned().collect();
588
589        // Collect edges where both src and dst are in the top-N set
590        let edges_vec: Vec<Edge> = self
591            .edges
592            .values()
593            .filter(|edge| {
594                top_ids.contains(edge.src.as_str()) && top_ids.contains(edge.dst.as_str())
595            })
596            .cloned()
597            .collect();
598
599        (nodes_vec, edges_vec)
600    }
601
602    /// Return node-to-community-ID mapping for Louvain.
603    pub fn louvain_with_assignment(&self, resolution: f64) -> HashMap<String, usize> {
604        let communities = self.louvain_communities(resolution);
605        let mut assignment = HashMap::new();
606        for (idx, community) in communities.into_iter().enumerate() {
607            for node_id in community {
608                assignment.insert(node_id, idx);
609            }
610        }
611        assignment
612    }
613}
614
615#[cfg(test)]
616#[path = "tests/algorithms_tests.rs"]
617mod tests;