Skip to main content

drft/analyses/
pagerank.rs

1use super::{Analysis, AnalysisContext};
2use std::collections::HashMap;
3
4#[derive(Debug, Clone, serde::Serialize)]
5pub struct NodePageRank {
6    pub node: String,
7    pub score: f64,
8}
9
10#[derive(Debug, Clone, serde::Serialize)]
11pub struct PageRankResult {
12    pub iterations: usize,
13    pub converged: bool,
14    pub nodes: Vec<NodePageRank>,
15}
16
17pub struct PageRank;
18
19const DAMPING: f64 = 0.85;
20const MAX_ITERATIONS: usize = 100;
21const EPSILON: f64 = 1e-6;
22
23impl Analysis for PageRank {
24    type Output = PageRankResult;
25
26    fn name(&self) -> &str {
27        "pagerank"
28    }
29
30    fn run(&self, ctx: &AnalysisContext) -> PageRankResult {
31        let graph = ctx.graph;
32        let real_nodes: Vec<&str> = graph.included_nodes().map(|(s, _)| s.as_str()).collect();
33
34        let n = real_nodes.len();
35        if n == 0 {
36            return PageRankResult {
37                iterations: 0,
38                converged: true,
39                nodes: Vec::new(),
40            };
41        }
42
43        // Build adjacency among real nodes only
44        let mut out_degree: HashMap<&str, usize> = HashMap::new();
45        let mut inbound: HashMap<&str, Vec<&str>> = HashMap::new();
46
47        for &node in &real_nodes {
48            out_degree.insert(node, 0);
49            inbound.insert(node, Vec::new());
50        }
51
52        for edge in &graph.edges {
53            if graph.is_internal_edge(edge) && edge.source != edge.target {
54                *out_degree.get_mut(edge.source.as_str()).unwrap() += 1;
55                inbound
56                    .get_mut(edge.target.as_str())
57                    .unwrap()
58                    .push(edge.source.as_str());
59            }
60        }
61
62        let init = 1.0 / n as f64;
63        let mut rank: HashMap<&str, f64> = HashMap::new();
64        for &node in &real_nodes {
65            rank.insert(node, init);
66        }
67
68        // Identify dangling nodes (out-degree 0)
69        let dangling: Vec<&str> = real_nodes
70            .iter()
71            .filter(|&&node| out_degree[node] == 0)
72            .copied()
73            .collect();
74
75        let mut iterations = 0;
76        let mut converged = false;
77
78        for _ in 0..MAX_ITERATIONS {
79            iterations += 1;
80
81            // Dangling node contribution distributed evenly
82            let dangling_sum: f64 = dangling.iter().map(|&node| rank[node]).sum();
83
84            let mut new_rank: HashMap<&str, f64> = HashMap::new();
85            let base = (1.0 - DAMPING) / n as f64 + DAMPING * dangling_sum / n as f64;
86
87            for &node in &real_nodes {
88                let mut incoming_sum = 0.0;
89                for &pred in &inbound[node] {
90                    incoming_sum += rank[pred] / out_degree[pred] as f64;
91                }
92                new_rank.insert(node, base + DAMPING * incoming_sum);
93            }
94
95            // Check convergence (L1 norm)
96            let diff: f64 = real_nodes
97                .iter()
98                .map(|&node| (new_rank[node] - rank[node]).abs())
99                .sum();
100
101            rank = new_rank;
102
103            if diff < EPSILON {
104                converged = true;
105                break;
106            }
107        }
108
109        let mut nodes: Vec<NodePageRank> = rank
110            .into_iter()
111            .map(|(node, score)| NodePageRank {
112                node: node.to_string(),
113                score,
114            })
115            .collect();
116
117        // Sort by score descending, then node ascending
118        nodes.sort_by(|a, b| {
119            b.score
120                .partial_cmp(&a.score)
121                .unwrap()
122                .then_with(|| a.node.cmp(&b.node))
123        });
124
125        PageRankResult {
126            iterations,
127            converged,
128            nodes,
129        }
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136    use crate::analyses::AnalysisContext;
137    use crate::config::Config;
138    use crate::graph::Graph;
139    use crate::graph::test_helpers::{make_edge, make_node};
140    use std::path::Path;
141
142    fn make_ctx<'a>(graph: &'a Graph, config: &'a Config) -> AnalysisContext<'a> {
143        AnalysisContext {
144            graph,
145            root: Path::new("."),
146            config,
147            lockfile: None,
148        }
149    }
150
151    #[test]
152    fn single_node() {
153        let mut graph = Graph::new();
154        graph.add_node(make_node("a.md"));
155
156        let config = Config::defaults();
157        let result = PageRank.run(&make_ctx(&graph, &config));
158        assert!(result.converged);
159        assert_eq!(result.nodes.len(), 1);
160        assert!((result.nodes[0].score - 1.0).abs() < 1e-4);
161    }
162
163    #[test]
164    fn two_nodes_with_link() {
165        let mut graph = Graph::new();
166        graph.add_node(make_node("a.md"));
167        graph.add_node(make_node("b.md"));
168        graph.add_edge(make_edge("a.md", "b.md"));
169
170        let config = Config::defaults();
171        let result = PageRank.run(&make_ctx(&graph, &config));
172        assert!(result.converged);
173        let a = result.nodes.iter().find(|n| n.node == "a.md").unwrap();
174        let b = result.nodes.iter().find(|n| n.node == "b.md").unwrap();
175        assert!(b.score > a.score);
176    }
177
178    #[test]
179    fn scores_sum_to_one() {
180        let mut graph = Graph::new();
181        graph.add_node(make_node("a.md"));
182        graph.add_node(make_node("b.md"));
183        graph.add_node(make_node("c.md"));
184        graph.add_edge(make_edge("a.md", "b.md"));
185        graph.add_edge(make_edge("b.md", "c.md"));
186        graph.add_edge(make_edge("c.md", "a.md"));
187
188        let config = Config::defaults();
189        let result = PageRank.run(&make_ctx(&graph, &config));
190        let sum: f64 = result.nodes.iter().map(|n| n.score).sum();
191        assert!((sum - 1.0).abs() < 1e-4);
192    }
193
194    #[test]
195    fn empty_graph() {
196        let graph = Graph::new();
197        let config = Config::defaults();
198        let result = PageRank.run(&make_ctx(&graph, &config));
199        assert!(result.converged);
200        assert!(result.nodes.is_empty());
201    }
202
203    #[test]
204    fn dangling_node() {
205        let mut graph = Graph::new();
206        graph.add_node(make_node("a.md"));
207        graph.add_node(make_node("b.md"));
208        graph.add_node(make_node("c.md"));
209        graph.add_edge(make_edge("a.md", "b.md"));
210
211        let config = Config::defaults();
212        let result = PageRank.run(&make_ctx(&graph, &config));
213        assert!(result.converged);
214        let sum: f64 = result.nodes.iter().map(|n| n.score).sum();
215        assert!((sum - 1.0).abs() < 1e-4);
216    }
217}