Skip to main content

drft/analyses/
pagerank.rs

1use super::{Analysis, AnalysisContext};
2use std::collections::HashMap;
3
4#[derive(Debug, Clone, serde::Serialize)]
5pub struct NodePageRank {
6    pub node: String,
7    pub score: f64,
8}
9
10#[derive(Debug, Clone, serde::Serialize)]
11pub struct PageRankResult {
12    pub iterations: usize,
13    pub converged: bool,
14    pub nodes: Vec<NodePageRank>,
15}
16
17pub struct PageRank;
18
19const DAMPING: f64 = 0.85;
20const MAX_ITERATIONS: usize = 100;
21const EPSILON: f64 = 1e-6;
22
23impl Analysis for PageRank {
24    type Output = PageRankResult;
25
26    fn name(&self) -> &str {
27        "pagerank"
28    }
29
30    fn run(&self, ctx: &AnalysisContext) -> PageRankResult {
31        let graph = ctx.graph;
32        let real_nodes: Vec<&str> = graph
33            .nodes
34            .keys()
35            .filter(|p| graph.is_included_node(p))
36            .map(|s| s.as_str())
37            .collect();
38
39        let n = real_nodes.len();
40        if n == 0 {
41            return PageRankResult {
42                iterations: 0,
43                converged: true,
44                nodes: Vec::new(),
45            };
46        }
47
48        // Build adjacency among real nodes only
49        let mut out_degree: HashMap<&str, usize> = HashMap::new();
50        let mut inbound: HashMap<&str, Vec<&str>> = HashMap::new();
51
52        for &node in &real_nodes {
53            out_degree.insert(node, 0);
54            inbound.insert(node, Vec::new());
55        }
56
57        for edge in &graph.edges {
58            if graph.is_internal_edge(edge) && edge.source != edge.target {
59                *out_degree.get_mut(edge.source.as_str()).unwrap() += 1;
60                inbound
61                    .get_mut(edge.target.as_str())
62                    .unwrap()
63                    .push(edge.source.as_str());
64            }
65        }
66
67        let init = 1.0 / n as f64;
68        let mut rank: HashMap<&str, f64> = HashMap::new();
69        for &node in &real_nodes {
70            rank.insert(node, init);
71        }
72
73        // Identify dangling nodes (out-degree 0)
74        let dangling: Vec<&str> = real_nodes
75            .iter()
76            .filter(|&&node| out_degree[node] == 0)
77            .copied()
78            .collect();
79
80        let mut iterations = 0;
81        let mut converged = false;
82
83        for _ in 0..MAX_ITERATIONS {
84            iterations += 1;
85
86            // Dangling node contribution distributed evenly
87            let dangling_sum: f64 = dangling.iter().map(|&node| rank[node]).sum();
88
89            let mut new_rank: HashMap<&str, f64> = HashMap::new();
90            let base = (1.0 - DAMPING) / n as f64 + DAMPING * dangling_sum / n as f64;
91
92            for &node in &real_nodes {
93                let mut incoming_sum = 0.0;
94                for &pred in &inbound[node] {
95                    incoming_sum += rank[pred] / out_degree[pred] as f64;
96                }
97                new_rank.insert(node, base + DAMPING * incoming_sum);
98            }
99
100            // Check convergence (L1 norm)
101            let diff: f64 = real_nodes
102                .iter()
103                .map(|&node| (new_rank[node] - rank[node]).abs())
104                .sum();
105
106            rank = new_rank;
107
108            if diff < EPSILON {
109                converged = true;
110                break;
111            }
112        }
113
114        let mut nodes: Vec<NodePageRank> = rank
115            .into_iter()
116            .map(|(node, score)| NodePageRank {
117                node: node.to_string(),
118                score,
119            })
120            .collect();
121
122        // Sort by score descending, then node ascending
123        nodes.sort_by(|a, b| {
124            b.score
125                .partial_cmp(&a.score)
126                .unwrap()
127                .then_with(|| a.node.cmp(&b.node))
128        });
129
130        PageRankResult {
131            iterations,
132            converged,
133            nodes,
134        }
135    }
136}
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141    use crate::analyses::AnalysisContext;
142    use crate::config::Config;
143    use crate::graph::Graph;
144    use crate::graph::test_helpers::{make_edge, make_node};
145    use std::path::Path;
146
147    fn make_ctx<'a>(graph: &'a Graph, config: &'a Config) -> AnalysisContext<'a> {
148        AnalysisContext {
149            graph,
150            root: Path::new("."),
151            config,
152            lockfile: None,
153        }
154    }
155
156    #[test]
157    fn single_node() {
158        let mut graph = Graph::new();
159        graph.add_node(make_node("a.md"));
160
161        let config = Config::defaults();
162        let result = PageRank.run(&make_ctx(&graph, &config));
163        assert!(result.converged);
164        assert_eq!(result.nodes.len(), 1);
165        assert!((result.nodes[0].score - 1.0).abs() < 1e-4);
166    }
167
168    #[test]
169    fn two_nodes_with_link() {
170        let mut graph = Graph::new();
171        graph.add_node(make_node("a.md"));
172        graph.add_node(make_node("b.md"));
173        graph.add_edge(make_edge("a.md", "b.md"));
174
175        let config = Config::defaults();
176        let result = PageRank.run(&make_ctx(&graph, &config));
177        assert!(result.converged);
178        let a = result.nodes.iter().find(|n| n.node == "a.md").unwrap();
179        let b = result.nodes.iter().find(|n| n.node == "b.md").unwrap();
180        assert!(b.score > a.score);
181    }
182
183    #[test]
184    fn scores_sum_to_one() {
185        let mut graph = Graph::new();
186        graph.add_node(make_node("a.md"));
187        graph.add_node(make_node("b.md"));
188        graph.add_node(make_node("c.md"));
189        graph.add_edge(make_edge("a.md", "b.md"));
190        graph.add_edge(make_edge("b.md", "c.md"));
191        graph.add_edge(make_edge("c.md", "a.md"));
192
193        let config = Config::defaults();
194        let result = PageRank.run(&make_ctx(&graph, &config));
195        let sum: f64 = result.nodes.iter().map(|n| n.score).sum();
196        assert!((sum - 1.0).abs() < 1e-4);
197    }
198
199    #[test]
200    fn empty_graph() {
201        let graph = Graph::new();
202        let config = Config::defaults();
203        let result = PageRank.run(&make_ctx(&graph, &config));
204        assert!(result.converged);
205        assert!(result.nodes.is_empty());
206    }
207
208    #[test]
209    fn dangling_node() {
210        let mut graph = Graph::new();
211        graph.add_node(make_node("a.md"));
212        graph.add_node(make_node("b.md"));
213        graph.add_node(make_node("c.md"));
214        graph.add_edge(make_edge("a.md", "b.md"));
215
216        let config = Config::defaults();
217        let result = PageRank.run(&make_ctx(&graph, &config));
218        assert!(result.converged);
219        let sum: f64 = result.nodes.iter().map(|n| n.score).sum();
220        assert!((sum - 1.0).abs() < 1e-4);
221    }
222}