drft/analyses/
pagerank.rs1use super::{Analysis, AnalysisContext};
2use std::collections::HashMap;
3
4#[derive(Debug, Clone, serde::Serialize)]
5pub struct NodePageRank {
6 pub node: String,
7 pub score: f64,
8}
9
10#[derive(Debug, Clone, serde::Serialize)]
11pub struct PageRankResult {
12 pub iterations: usize,
13 pub converged: bool,
14 pub nodes: Vec<NodePageRank>,
15}
16
17pub struct PageRank;
18
19const DAMPING: f64 = 0.85;
20const MAX_ITERATIONS: usize = 100;
21const EPSILON: f64 = 1e-6;
22
23impl Analysis for PageRank {
24 type Output = PageRankResult;
25
26 fn name(&self) -> &str {
27 "pagerank"
28 }
29
30 fn run(&self, ctx: &AnalysisContext) -> PageRankResult {
31 let graph = ctx.graph;
32 let real_nodes: Vec<&str> = graph
33 .nodes
34 .keys()
35 .filter(|p| graph.is_included_node(p))
36 .map(|s| s.as_str())
37 .collect();
38
39 let n = real_nodes.len();
40 if n == 0 {
41 return PageRankResult {
42 iterations: 0,
43 converged: true,
44 nodes: Vec::new(),
45 };
46 }
47
48 let mut out_degree: HashMap<&str, usize> = HashMap::new();
50 let mut inbound: HashMap<&str, Vec<&str>> = HashMap::new();
51
52 for &node in &real_nodes {
53 out_degree.insert(node, 0);
54 inbound.insert(node, Vec::new());
55 }
56
57 for edge in &graph.edges {
58 if graph.is_internal_edge(edge) && edge.source != edge.target {
59 *out_degree.get_mut(edge.source.as_str()).unwrap() += 1;
60 inbound
61 .get_mut(edge.target.as_str())
62 .unwrap()
63 .push(edge.source.as_str());
64 }
65 }
66
67 let init = 1.0 / n as f64;
68 let mut rank: HashMap<&str, f64> = HashMap::new();
69 for &node in &real_nodes {
70 rank.insert(node, init);
71 }
72
73 let dangling: Vec<&str> = real_nodes
75 .iter()
76 .filter(|&&node| out_degree[node] == 0)
77 .copied()
78 .collect();
79
80 let mut iterations = 0;
81 let mut converged = false;
82
83 for _ in 0..MAX_ITERATIONS {
84 iterations += 1;
85
86 let dangling_sum: f64 = dangling.iter().map(|&node| rank[node]).sum();
88
89 let mut new_rank: HashMap<&str, f64> = HashMap::new();
90 let base = (1.0 - DAMPING) / n as f64 + DAMPING * dangling_sum / n as f64;
91
92 for &node in &real_nodes {
93 let mut incoming_sum = 0.0;
94 for &pred in &inbound[node] {
95 incoming_sum += rank[pred] / out_degree[pred] as f64;
96 }
97 new_rank.insert(node, base + DAMPING * incoming_sum);
98 }
99
100 let diff: f64 = real_nodes
102 .iter()
103 .map(|&node| (new_rank[node] - rank[node]).abs())
104 .sum();
105
106 rank = new_rank;
107
108 if diff < EPSILON {
109 converged = true;
110 break;
111 }
112 }
113
114 let mut nodes: Vec<NodePageRank> = rank
115 .into_iter()
116 .map(|(node, score)| NodePageRank {
117 node: node.to_string(),
118 score,
119 })
120 .collect();
121
122 nodes.sort_by(|a, b| {
124 b.score
125 .partial_cmp(&a.score)
126 .unwrap()
127 .then_with(|| a.node.cmp(&b.node))
128 });
129
130 PageRankResult {
131 iterations,
132 converged,
133 nodes,
134 }
135 }
136}
137
138#[cfg(test)]
139mod tests {
140 use super::*;
141 use crate::analyses::AnalysisContext;
142 use crate::config::Config;
143 use crate::graph::Graph;
144 use crate::graph::test_helpers::{make_edge, make_node};
145 use std::path::Path;
146
147 fn make_ctx<'a>(graph: &'a Graph, config: &'a Config) -> AnalysisContext<'a> {
148 AnalysisContext {
149 graph,
150 root: Path::new("."),
151 config,
152 lockfile: None,
153 }
154 }
155
156 #[test]
157 fn single_node() {
158 let mut graph = Graph::new();
159 graph.add_node(make_node("a.md"));
160
161 let config = Config::defaults();
162 let result = PageRank.run(&make_ctx(&graph, &config));
163 assert!(result.converged);
164 assert_eq!(result.nodes.len(), 1);
165 assert!((result.nodes[0].score - 1.0).abs() < 1e-4);
166 }
167
168 #[test]
169 fn two_nodes_with_link() {
170 let mut graph = Graph::new();
171 graph.add_node(make_node("a.md"));
172 graph.add_node(make_node("b.md"));
173 graph.add_edge(make_edge("a.md", "b.md"));
174
175 let config = Config::defaults();
176 let result = PageRank.run(&make_ctx(&graph, &config));
177 assert!(result.converged);
178 let a = result.nodes.iter().find(|n| n.node == "a.md").unwrap();
179 let b = result.nodes.iter().find(|n| n.node == "b.md").unwrap();
180 assert!(b.score > a.score);
181 }
182
183 #[test]
184 fn scores_sum_to_one() {
185 let mut graph = Graph::new();
186 graph.add_node(make_node("a.md"));
187 graph.add_node(make_node("b.md"));
188 graph.add_node(make_node("c.md"));
189 graph.add_edge(make_edge("a.md", "b.md"));
190 graph.add_edge(make_edge("b.md", "c.md"));
191 graph.add_edge(make_edge("c.md", "a.md"));
192
193 let config = Config::defaults();
194 let result = PageRank.run(&make_ctx(&graph, &config));
195 let sum: f64 = result.nodes.iter().map(|n| n.score).sum();
196 assert!((sum - 1.0).abs() < 1e-4);
197 }
198
199 #[test]
200 fn empty_graph() {
201 let graph = Graph::new();
202 let config = Config::defaults();
203 let result = PageRank.run(&make_ctx(&graph, &config));
204 assert!(result.converged);
205 assert!(result.nodes.is_empty());
206 }
207
208 #[test]
209 fn dangling_node() {
210 let mut graph = Graph::new();
211 graph.add_node(make_node("a.md"));
212 graph.add_node(make_node("b.md"));
213 graph.add_node(make_node("c.md"));
214 graph.add_edge(make_edge("a.md", "b.md"));
215
216 let config = Config::defaults();
217 let result = PageRank.run(&make_ctx(&graph, &config));
218 assert!(result.converged);
219 let sum: f64 = result.nodes.iter().map(|n| n.score).sum();
220 assert!((sum - 1.0).abs() < 1e-4);
221 }
222}