Skip to main content

drft/analyses/
pagerank.rs

1use super::{Analysis, AnalysisContext};
2use std::collections::HashMap;
3
4#[derive(Debug, Clone, serde::Serialize)]
5pub struct NodePageRank {
6    pub node: String,
7    pub score: f64,
8}
9
10#[derive(Debug, Clone, serde::Serialize)]
11pub struct PageRankResult {
12    pub iterations: usize,
13    pub converged: bool,
14    pub nodes: Vec<NodePageRank>,
15}
16
17pub struct PageRank;
18
19const DAMPING: f64 = 0.85;
20const MAX_ITERATIONS: usize = 100;
21const EPSILON: f64 = 1e-6;
22
23impl Analysis for PageRank {
24    type Output = PageRankResult;
25
26    fn name(&self) -> &str {
27        "pagerank"
28    }
29
30    fn run(&self, ctx: &AnalysisContext) -> PageRankResult {
31        let graph = ctx.graph;
32        let real_nodes: Vec<&str> = graph
33            .nodes
34            .keys()
35            .filter(|p| graph.is_file_node(p))
36            .map(|s| s.as_str())
37            .collect();
38
39        let n = real_nodes.len();
40        if n == 0 {
41            return PageRankResult {
42                iterations: 0,
43                converged: true,
44                nodes: Vec::new(),
45            };
46        }
47
48        // Build adjacency among real nodes only
49        let mut out_degree: HashMap<&str, usize> = HashMap::new();
50        let mut inbound: HashMap<&str, Vec<&str>> = HashMap::new();
51
52        for &node in &real_nodes {
53            out_degree.insert(node, 0);
54            inbound.insert(node, Vec::new());
55        }
56
57        for edge in &graph.edges {
58            if graph.is_file_node(&edge.source)
59                && graph.is_file_node(&edge.target)
60                && edge.source != edge.target
61            {
62                *out_degree.get_mut(edge.source.as_str()).unwrap() += 1;
63                inbound
64                    .get_mut(edge.target.as_str())
65                    .unwrap()
66                    .push(edge.source.as_str());
67            }
68        }
69
70        let init = 1.0 / n as f64;
71        let mut rank: HashMap<&str, f64> = HashMap::new();
72        for &node in &real_nodes {
73            rank.insert(node, init);
74        }
75
76        // Identify dangling nodes (out-degree 0)
77        let dangling: Vec<&str> = real_nodes
78            .iter()
79            .filter(|&&node| out_degree[node] == 0)
80            .copied()
81            .collect();
82
83        let mut iterations = 0;
84        let mut converged = false;
85
86        for _ in 0..MAX_ITERATIONS {
87            iterations += 1;
88
89            // Dangling node contribution distributed evenly
90            let dangling_sum: f64 = dangling.iter().map(|&node| rank[node]).sum();
91
92            let mut new_rank: HashMap<&str, f64> = HashMap::new();
93            let base = (1.0 - DAMPING) / n as f64 + DAMPING * dangling_sum / n as f64;
94
95            for &node in &real_nodes {
96                let mut incoming_sum = 0.0;
97                for &pred in &inbound[node] {
98                    incoming_sum += rank[pred] / out_degree[pred] as f64;
99                }
100                new_rank.insert(node, base + DAMPING * incoming_sum);
101            }
102
103            // Check convergence (L1 norm)
104            let diff: f64 = real_nodes
105                .iter()
106                .map(|&node| (new_rank[node] - rank[node]).abs())
107                .sum();
108
109            rank = new_rank;
110
111            if diff < EPSILON {
112                converged = true;
113                break;
114            }
115        }
116
117        let mut nodes: Vec<NodePageRank> = rank
118            .into_iter()
119            .map(|(node, score)| NodePageRank {
120                node: node.to_string(),
121                score,
122            })
123            .collect();
124
125        // Sort by score descending, then node ascending
126        nodes.sort_by(|a, b| {
127            b.score
128                .partial_cmp(&a.score)
129                .unwrap()
130                .then_with(|| a.node.cmp(&b.node))
131        });
132
133        PageRankResult {
134            iterations,
135            converged,
136            nodes,
137        }
138    }
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144    use crate::analyses::AnalysisContext;
145    use crate::config::Config;
146    use crate::graph::Graph;
147    use crate::graph::test_helpers::{make_edge, make_node};
148    use std::path::Path;
149
150    fn make_ctx<'a>(graph: &'a Graph, config: &'a Config) -> AnalysisContext<'a> {
151        AnalysisContext {
152            graph,
153            root: Path::new("."),
154            config,
155            lockfile: None,
156        }
157    }
158
159    #[test]
160    fn single_node() {
161        let mut graph = Graph::new();
162        graph.add_node(make_node("a.md"));
163
164        let config = Config::defaults();
165        let result = PageRank.run(&make_ctx(&graph, &config));
166        assert!(result.converged);
167        assert_eq!(result.nodes.len(), 1);
168        assert!((result.nodes[0].score - 1.0).abs() < 1e-4);
169    }
170
171    #[test]
172    fn two_nodes_with_link() {
173        let mut graph = Graph::new();
174        graph.add_node(make_node("a.md"));
175        graph.add_node(make_node("b.md"));
176        graph.add_edge(make_edge("a.md", "b.md"));
177
178        let config = Config::defaults();
179        let result = PageRank.run(&make_ctx(&graph, &config));
180        assert!(result.converged);
181        let a = result.nodes.iter().find(|n| n.node == "a.md").unwrap();
182        let b = result.nodes.iter().find(|n| n.node == "b.md").unwrap();
183        assert!(b.score > a.score);
184    }
185
186    #[test]
187    fn scores_sum_to_one() {
188        let mut graph = Graph::new();
189        graph.add_node(make_node("a.md"));
190        graph.add_node(make_node("b.md"));
191        graph.add_node(make_node("c.md"));
192        graph.add_edge(make_edge("a.md", "b.md"));
193        graph.add_edge(make_edge("b.md", "c.md"));
194        graph.add_edge(make_edge("c.md", "a.md"));
195
196        let config = Config::defaults();
197        let result = PageRank.run(&make_ctx(&graph, &config));
198        let sum: f64 = result.nodes.iter().map(|n| n.score).sum();
199        assert!((sum - 1.0).abs() < 1e-4);
200    }
201
202    #[test]
203    fn empty_graph() {
204        let graph = Graph::new();
205        let config = Config::defaults();
206        let result = PageRank.run(&make_ctx(&graph, &config));
207        assert!(result.converged);
208        assert!(result.nodes.is_empty());
209    }
210
211    #[test]
212    fn dangling_node() {
213        let mut graph = Graph::new();
214        graph.add_node(make_node("a.md"));
215        graph.add_node(make_node("b.md"));
216        graph.add_node(make_node("c.md"));
217        graph.add_edge(make_edge("a.md", "b.md"));
218
219        let config = Config::defaults();
220        let result = PageRank.run(&make_ctx(&graph, &config));
221        assert!(result.converged);
222        let sum: f64 = result.nodes.iter().map(|n| n.score).sum();
223        assert!((sum - 1.0).abs() < 1e-4);
224    }
225}