drft/analyses/
pagerank.rs1use super::{Analysis, AnalysisContext};
2use std::collections::HashMap;
3
4#[derive(Debug, Clone, serde::Serialize)]
5pub struct NodePageRank {
6 pub node: String,
7 pub score: f64,
8}
9
10#[derive(Debug, Clone, serde::Serialize)]
11pub struct PageRankResult {
12 pub iterations: usize,
13 pub converged: bool,
14 pub nodes: Vec<NodePageRank>,
15}
16
17pub struct PageRank;
18
19const DAMPING: f64 = 0.85;
20const MAX_ITERATIONS: usize = 100;
21const EPSILON: f64 = 1e-6;
22
23impl Analysis for PageRank {
24 type Output = PageRankResult;
25
26 fn name(&self) -> &str {
27 "pagerank"
28 }
29
30 fn run(&self, ctx: &AnalysisContext) -> PageRankResult {
31 let graph = ctx.graph;
32 let real_nodes: Vec<&str> = graph.included_nodes().map(|(s, _)| s.as_str()).collect();
33
34 let n = real_nodes.len();
35 if n == 0 {
36 return PageRankResult {
37 iterations: 0,
38 converged: true,
39 nodes: Vec::new(),
40 };
41 }
42
43 let mut out_degree: HashMap<&str, usize> = HashMap::new();
45 let mut inbound: HashMap<&str, Vec<&str>> = HashMap::new();
46
47 for &node in &real_nodes {
48 out_degree.insert(node, 0);
49 inbound.insert(node, Vec::new());
50 }
51
52 for edge in &graph.edges {
53 if graph.is_internal_edge(edge) && edge.source != edge.target {
54 *out_degree.get_mut(edge.source.as_str()).unwrap() += 1;
55 inbound
56 .get_mut(edge.target.as_str())
57 .unwrap()
58 .push(edge.source.as_str());
59 }
60 }
61
62 let init = 1.0 / n as f64;
63 let mut rank: HashMap<&str, f64> = HashMap::new();
64 for &node in &real_nodes {
65 rank.insert(node, init);
66 }
67
68 let dangling: Vec<&str> = real_nodes
70 .iter()
71 .filter(|&&node| out_degree[node] == 0)
72 .copied()
73 .collect();
74
75 let mut iterations = 0;
76 let mut converged = false;
77
78 for _ in 0..MAX_ITERATIONS {
79 iterations += 1;
80
81 let dangling_sum: f64 = dangling.iter().map(|&node| rank[node]).sum();
83
84 let mut new_rank: HashMap<&str, f64> = HashMap::new();
85 let base = (1.0 - DAMPING) / n as f64 + DAMPING * dangling_sum / n as f64;
86
87 for &node in &real_nodes {
88 let mut incoming_sum = 0.0;
89 for &pred in &inbound[node] {
90 incoming_sum += rank[pred] / out_degree[pred] as f64;
91 }
92 new_rank.insert(node, base + DAMPING * incoming_sum);
93 }
94
95 let diff: f64 = real_nodes
97 .iter()
98 .map(|&node| (new_rank[node] - rank[node]).abs())
99 .sum();
100
101 rank = new_rank;
102
103 if diff < EPSILON {
104 converged = true;
105 break;
106 }
107 }
108
109 let mut nodes: Vec<NodePageRank> = rank
110 .into_iter()
111 .map(|(node, score)| NodePageRank {
112 node: node.to_string(),
113 score,
114 })
115 .collect();
116
117 nodes.sort_by(|a, b| {
119 b.score
120 .partial_cmp(&a.score)
121 .unwrap()
122 .then_with(|| a.node.cmp(&b.node))
123 });
124
125 PageRankResult {
126 iterations,
127 converged,
128 nodes,
129 }
130 }
131}
132
133#[cfg(test)]
134mod tests {
135 use super::*;
136 use crate::analyses::AnalysisContext;
137 use crate::config::Config;
138 use crate::graph::Graph;
139 use crate::graph::test_helpers::{make_edge, make_node};
140 use std::path::Path;
141
142 fn make_ctx<'a>(graph: &'a Graph, config: &'a Config) -> AnalysisContext<'a> {
143 AnalysisContext {
144 graph,
145 root: Path::new("."),
146 config,
147 lockfile: None,
148 }
149 }
150
151 #[test]
152 fn single_node() {
153 let mut graph = Graph::new();
154 graph.add_node(make_node("a.md"));
155
156 let config = Config::defaults();
157 let result = PageRank.run(&make_ctx(&graph, &config));
158 assert!(result.converged);
159 assert_eq!(result.nodes.len(), 1);
160 assert!((result.nodes[0].score - 1.0).abs() < 1e-4);
161 }
162
163 #[test]
164 fn two_nodes_with_link() {
165 let mut graph = Graph::new();
166 graph.add_node(make_node("a.md"));
167 graph.add_node(make_node("b.md"));
168 graph.add_edge(make_edge("a.md", "b.md"));
169
170 let config = Config::defaults();
171 let result = PageRank.run(&make_ctx(&graph, &config));
172 assert!(result.converged);
173 let a = result.nodes.iter().find(|n| n.node == "a.md").unwrap();
174 let b = result.nodes.iter().find(|n| n.node == "b.md").unwrap();
175 assert!(b.score > a.score);
176 }
177
178 #[test]
179 fn scores_sum_to_one() {
180 let mut graph = Graph::new();
181 graph.add_node(make_node("a.md"));
182 graph.add_node(make_node("b.md"));
183 graph.add_node(make_node("c.md"));
184 graph.add_edge(make_edge("a.md", "b.md"));
185 graph.add_edge(make_edge("b.md", "c.md"));
186 graph.add_edge(make_edge("c.md", "a.md"));
187
188 let config = Config::defaults();
189 let result = PageRank.run(&make_ctx(&graph, &config));
190 let sum: f64 = result.nodes.iter().map(|n| n.score).sum();
191 assert!((sum - 1.0).abs() < 1e-4);
192 }
193
194 #[test]
195 fn empty_graph() {
196 let graph = Graph::new();
197 let config = Config::defaults();
198 let result = PageRank.run(&make_ctx(&graph, &config));
199 assert!(result.converged);
200 assert!(result.nodes.is_empty());
201 }
202
203 #[test]
204 fn dangling_node() {
205 let mut graph = Graph::new();
206 graph.add_node(make_node("a.md"));
207 graph.add_node(make_node("b.md"));
208 graph.add_node(make_node("c.md"));
209 graph.add_edge(make_edge("a.md", "b.md"));
210
211 let config = Config::defaults();
212 let result = PageRank.run(&make_ctx(&graph, &config));
213 assert!(result.converged);
214 let sum: f64 = result.nodes.iter().map(|n| n.score).sum();
215 assert!((sum - 1.0).abs() < 1e-4);
216 }
217}