drft/analyses/
pagerank.rs1use super::{Analysis, AnalysisContext};
2use std::collections::HashMap;
3
4#[derive(Debug, Clone, serde::Serialize)]
5pub struct NodePageRank {
6 pub node: String,
7 pub score: f64,
8}
9
10#[derive(Debug, Clone, serde::Serialize)]
11pub struct PageRankResult {
12 pub iterations: usize,
13 pub converged: bool,
14 pub nodes: Vec<NodePageRank>,
15}
16
17pub struct PageRank;
18
19const DAMPING: f64 = 0.85;
20const MAX_ITERATIONS: usize = 100;
21const EPSILON: f64 = 1e-6;
22
23impl Analysis for PageRank {
24 type Output = PageRankResult;
25
26 fn name(&self) -> &str {
27 "pagerank"
28 }
29
30 fn run(&self, ctx: &AnalysisContext) -> PageRankResult {
31 let graph = ctx.graph;
32 let real_nodes: Vec<&str> = graph
33 .nodes
34 .keys()
35 .filter(|p| graph.is_file_node(p))
36 .map(|s| s.as_str())
37 .collect();
38
39 let n = real_nodes.len();
40 if n == 0 {
41 return PageRankResult {
42 iterations: 0,
43 converged: true,
44 nodes: Vec::new(),
45 };
46 }
47
48 let mut out_degree: HashMap<&str, usize> = HashMap::new();
50 let mut inbound: HashMap<&str, Vec<&str>> = HashMap::new();
51
52 for &node in &real_nodes {
53 out_degree.insert(node, 0);
54 inbound.insert(node, Vec::new());
55 }
56
57 for edge in &graph.edges {
58 if graph.is_file_node(&edge.source)
59 && graph.is_file_node(&edge.target)
60 && edge.source != edge.target
61 {
62 *out_degree.get_mut(edge.source.as_str()).unwrap() += 1;
63 inbound
64 .get_mut(edge.target.as_str())
65 .unwrap()
66 .push(edge.source.as_str());
67 }
68 }
69
70 let init = 1.0 / n as f64;
71 let mut rank: HashMap<&str, f64> = HashMap::new();
72 for &node in &real_nodes {
73 rank.insert(node, init);
74 }
75
76 let dangling: Vec<&str> = real_nodes
78 .iter()
79 .filter(|&&node| out_degree[node] == 0)
80 .copied()
81 .collect();
82
83 let mut iterations = 0;
84 let mut converged = false;
85
86 for _ in 0..MAX_ITERATIONS {
87 iterations += 1;
88
89 let dangling_sum: f64 = dangling.iter().map(|&node| rank[node]).sum();
91
92 let mut new_rank: HashMap<&str, f64> = HashMap::new();
93 let base = (1.0 - DAMPING) / n as f64 + DAMPING * dangling_sum / n as f64;
94
95 for &node in &real_nodes {
96 let mut incoming_sum = 0.0;
97 for &pred in &inbound[node] {
98 incoming_sum += rank[pred] / out_degree[pred] as f64;
99 }
100 new_rank.insert(node, base + DAMPING * incoming_sum);
101 }
102
103 let diff: f64 = real_nodes
105 .iter()
106 .map(|&node| (new_rank[node] - rank[node]).abs())
107 .sum();
108
109 rank = new_rank;
110
111 if diff < EPSILON {
112 converged = true;
113 break;
114 }
115 }
116
117 let mut nodes: Vec<NodePageRank> = rank
118 .into_iter()
119 .map(|(node, score)| NodePageRank {
120 node: node.to_string(),
121 score,
122 })
123 .collect();
124
125 nodes.sort_by(|a, b| {
127 b.score
128 .partial_cmp(&a.score)
129 .unwrap()
130 .then_with(|| a.node.cmp(&b.node))
131 });
132
133 PageRankResult {
134 iterations,
135 converged,
136 nodes,
137 }
138 }
139}
140
141#[cfg(test)]
142mod tests {
143 use super::*;
144 use crate::analyses::AnalysisContext;
145 use crate::config::Config;
146 use crate::graph::Graph;
147 use crate::graph::test_helpers::{make_edge, make_node};
148 use std::path::Path;
149
150 fn make_ctx<'a>(graph: &'a Graph, config: &'a Config) -> AnalysisContext<'a> {
151 AnalysisContext {
152 graph,
153 root: Path::new("."),
154 config,
155 lockfile: None,
156 }
157 }
158
159 #[test]
160 fn single_node() {
161 let mut graph = Graph::new();
162 graph.add_node(make_node("a.md"));
163
164 let config = Config::defaults();
165 let result = PageRank.run(&make_ctx(&graph, &config));
166 assert!(result.converged);
167 assert_eq!(result.nodes.len(), 1);
168 assert!((result.nodes[0].score - 1.0).abs() < 1e-4);
169 }
170
171 #[test]
172 fn two_nodes_with_link() {
173 let mut graph = Graph::new();
174 graph.add_node(make_node("a.md"));
175 graph.add_node(make_node("b.md"));
176 graph.add_edge(make_edge("a.md", "b.md"));
177
178 let config = Config::defaults();
179 let result = PageRank.run(&make_ctx(&graph, &config));
180 assert!(result.converged);
181 let a = result.nodes.iter().find(|n| n.node == "a.md").unwrap();
182 let b = result.nodes.iter().find(|n| n.node == "b.md").unwrap();
183 assert!(b.score > a.score);
184 }
185
186 #[test]
187 fn scores_sum_to_one() {
188 let mut graph = Graph::new();
189 graph.add_node(make_node("a.md"));
190 graph.add_node(make_node("b.md"));
191 graph.add_node(make_node("c.md"));
192 graph.add_edge(make_edge("a.md", "b.md"));
193 graph.add_edge(make_edge("b.md", "c.md"));
194 graph.add_edge(make_edge("c.md", "a.md"));
195
196 let config = Config::defaults();
197 let result = PageRank.run(&make_ctx(&graph, &config));
198 let sum: f64 = result.nodes.iter().map(|n| n.score).sum();
199 assert!((sum - 1.0).abs() < 1e-4);
200 }
201
202 #[test]
203 fn empty_graph() {
204 let graph = Graph::new();
205 let config = Config::defaults();
206 let result = PageRank.run(&make_ctx(&graph, &config));
207 assert!(result.converged);
208 assert!(result.nodes.is_empty());
209 }
210
211 #[test]
212 fn dangling_node() {
213 let mut graph = Graph::new();
214 graph.add_node(make_node("a.md"));
215 graph.add_node(make_node("b.md"));
216 graph.add_node(make_node("c.md"));
217 graph.add_edge(make_edge("a.md", "b.md"));
218
219 let config = Config::defaults();
220 let result = PageRank.run(&make_ctx(&graph, &config));
221 assert!(result.converged);
222 let sum: f64 = result.nodes.iter().map(|n| n.score).sum();
223 assert!((sum - 1.0).abs() < 1e-4);
224 }
225}