1#![deny(missing_docs)]
5#![allow(warnings)]
6use std::collections::HashMap;
7use std::default::Default;
8use std::hash::Hash;
9
10#[derive(Clone)]
11struct Node<T>
12where
13 T: Eq + Hash + Clone,
14{
15 node: T,
17 in_edges: Vec<usize>,
19 out_edges: usize,
21 score: f64,
22}
23
24pub struct Pagerank<T>
27where
28 T: Eq + Hash + Clone,
29{
30 damping: f64,
37 nodes: Vec<Node<T>>,
39 edges: usize,
41 node_positions: HashMap<T, usize>,
43 nodes_with_in_edges: Option<usize>,
46}
47
48impl<T> Pagerank<T>
49where
50 T: Eq + Hash + Clone,
51{
52 pub fn new() -> Pagerank<T> {
54 Pagerank::<T> {
55 damping: 0.85,
56 nodes: Vec::new(),
57 edges: 0,
58 node_positions: HashMap::<T, usize>::new(),
59 nodes_with_in_edges: None,
60 }
61 }
62
63 pub fn set_damping_factor(
65 &mut self,
66 factor: u8,
67 ) -> Result<(), String> {
68 if factor >= 100 {
69 return Err("{val} needs to be bellow 100".to_string());
70 }
71
72 self.damping = factor as f64 / 100_f64;
73 Ok(())
74 }
75
76 pub fn add_edge(&mut self, source: T, target: T) {
78 let source = self.get_or_create_node(source);
79 let target = self.get_or_create_node(target);
80 self.nodes[source].out_edges += 1;
81 self.nodes[target].in_edges.push(source);
82 self.edges += 1;
83 }
84
85 pub fn get_score(&self, node: T) -> Option<f64> {
87 self.node_positions
88 .get(&node)
89 .map(|id| self.nodes[*id].score)
90 }
91
92 pub fn get_in_edges(&self, node: T) -> Option<usize> {
94 self.node_positions
95 .get(&node)
96 .map(|id| self.nodes[*id].in_edges.len())
97 }
98
99 pub fn get_out_edges(&self, node: T) -> Option<usize> {
101 self.node_positions
102 .get(&node)
103 .map(|id| self.nodes[*id].out_edges)
104 }
105
106 pub fn get_or_create_node(&mut self, node: T) -> usize {
108 match self.node_positions.get(&node) {
109 Some(&value) => value,
110 _ => {
111 let id = self.nodes.len();
112 self.nodes.push(Node::<T> {
113 node: node.clone(),
114 in_edges: Vec::new(),
115 out_edges: 0,
116 score: 1f64 - self.damping,
117 });
118 self.node_positions.insert(node, id);
119 self.nodes_with_in_edges = None;
120 id
121 }
122 }
123 }
124
125 pub fn calculate_with_convergence(
127 &mut self,
128 convergence: f64,
129 ) -> i32 {
130 let mut iterations = 0;
131
132 loop {
133 if self.calculate_step() < convergence {
134 break;
135 }
136 iterations += 1;
137 }
138
139 iterations
140 }
141
142 pub fn calculate(&mut self) -> i32 {
144 self.calculate_with_convergence(0.01)
145 }
146
147 pub fn nodes(&self) -> Vec<(&T, f64)> {
149 let mut nodes = self
150 .nodes
151 .iter()
152 .map(|node| (&node.node, node.score))
153 .collect::<Vec<(&T, f64)>>();
154
155 nodes.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
156
157 nodes
158 }
159
160 pub fn calculate_step(&mut self) -> f64 {
162 let mut current_iteration = self.nodes.clone();
163
164 let nodes = &self.nodes;
165
166 self.nodes
167 .iter()
168 .enumerate()
169 .map(|(id, n)| {
170 let score = n
171 .in_edges
172 .iter()
173 .map(|node| {
174 nodes[*node].score
175 / nodes[*node].out_edges as f64
176 })
177 .sum::<f64>();
178
179 current_iteration[id].score =
180 (1f64 - self.damping) + (self.damping * score);
181 })
182 .for_each(drop);
183
184 let convergence: f64 = self
185 .nodes
186 .iter()
187 .enumerate()
188 .map(|(id, n)| {
189 let diff = n.score - current_iteration[id].score;
190 diff * diff
191 })
192 .sum();
193
194 self.nodes = current_iteration;
195
196 convergence.sqrt() / self.len_nodes_with_in_edges() as f64
197 }
198
199 pub fn len_nodes_with_in_edges(&mut self) -> usize {
201 if let Some(n) = self.nodes_with_in_edges {
202 return n;
203 }
204
205 let mut total = 0;
206
207 for node in self.nodes.iter() {
208 if node.in_edges.len() > 0 {
209 total += 1;
210 }
211 }
212
213 self.nodes_with_in_edges = Some(total);
214
215 total
216 }
217
218 pub fn len(&self) -> usize {
220 self.nodes.len()
221 }
222
223 pub fn len_node(&self) -> usize {
225 self.edges
226 }
227
228 pub fn is_empty(&self) -> bool {
230 self.nodes.is_empty()
231 }
232}
233
234impl<T> Default for Pagerank<T>
235where
236 T: Eq + Hash + Clone,
237{
238 fn default() -> Self {
239 Self::new()
240 }
241}
242
243#[cfg(test)]
244mod tests {
245 use crate::Pagerank;
246
247 #[test]
248 fn test_two_nodes_are_created() {
249 let mut pr = Pagerank::<&str>::new();
250 pr.add_edge("foo", "bar");
251 assert_eq!(2, pr.len())
252 }
253
254 #[test]
255 fn test_edges() {
256 let mut pr = Pagerank::<&str>::new();
257 pr.add_edge("foo", "bar");
258 assert_eq!(0, pr.get_or_create_node("foo"));
259 assert_eq!(1, pr.get_or_create_node("bar"));
260
261 assert_eq!(Some(0), pr.get_in_edges("foo"));
262 assert_eq!(Some(1), pr.get_out_edges("foo"));
263 assert_eq!(Some(1), pr.get_in_edges("bar"));
264 assert_eq!(Some(0), pr.get_out_edges("bar"));
265 }
266
267 #[test]
268 fn test_default_score() {
269 let mut pr = Pagerank::<&str>::new();
270 pr.add_edge("foo", "bar");
271 pr.add_edge("bar", "foo");
272 pr.add_edge("xxx", "bar");
273 pr.add_edge("yyy", "xxx");
274
275 assert_eq!(
276 15_i64,
277 (pr.get_score("foo").expect("float") * 100_f64) as i64
278 );
279 assert_eq!(pr.get_score("foo"), pr.get_score("bar"));
280 assert_eq!(pr.get_score("foo"), pr.get_score("xxx"));
281 assert_eq!(pr.get_score("foo"), pr.get_score("yyy"));
282 }
283
284 #[test]
285 fn test_iteration() {
286 let mut pr = Pagerank::<&str>::new();
287 pr.add_edge("foo", "bar");
288 pr.add_edge("bar", "foo");
289 pr.add_edge("xxx", "bar");
290 pr.add_edge("yyy", "xxx");
291
292 pr.calculate_step();
293
294 assert_eq!(
295 vec!["bar", "foo", "xxx", "yyy"],
296 pr.nodes()
297 .iter()
298 .map(|(node, _)| **node)
299 .collect::<Vec<&str>>()
300 );
301 }
302
303 #[test]
304 fn test_iterations() {
305 let mut pr = Pagerank::<&str>::new();
306 pr.add_edge("foo", "bar");
307 pr.add_edge("bar", "foo");
308 pr.add_edge("xxx", "bar");
309 pr.add_edge("yyy", "xxx");
310
311 assert_eq!(true, pr.calculate_step() > pr.calculate_step());
312 pr.calculate_step();
313
314 assert_eq!(
315 vec!["bar", "foo", "xxx", "yyy"],
316 pr.nodes()
317 .iter()
318 .map(|(node, _)| **node)
319 .collect::<Vec<&str>>()
320 );
321 }
322
323 #[test]
324 fn test_full_run() {
325 let mut pr = Pagerank::<&str>::new();
326 pr.add_edge("foo", "bar");
327 pr.add_edge("bar", "foo");
328 pr.add_edge("xxx", "bar");
329 pr.add_edge("yyy", "xxx");
330
331 assert_eq!(16, pr.calculate());
332
333 assert_eq!(
334 vec!["bar", "foo", "xxx", "yyy"],
335 pr.nodes()
336 .iter()
337 .map(|(node, _)| **node)
338 .collect::<Vec<&str>>()
339 );
340 }
341
342 #[test]
343 fn test_pagerank_example() {
345 let mut pr = Pagerank::new();
346 let edges = vec![
347 ("D", "A"),
348 ("D", "B"),
349 ("B", "C"),
350 ("C", "B"),
351 ("E", "B"),
352 ("E", "F"),
353 ("F", "B"),
354 ("F", "E"),
355 ("G", "B"),
356 ("G", "E"),
357 ("H", "B"),
358 ("H", "E"),
359 ("I", "B"),
360 ("I", "E"),
361 ("J", "E"),
362 ("K", "E"),
363 ];
364
365 edges
366 .iter()
367 .map(|(l1, l2)| pr.add_edge(*l1, *l2))
368 .for_each(drop);
369
370 pr.calculate();
371
372 assert_eq!(
373 vec![
374 "B", "C", "E", "F", "A", "D", "G", "H", "I", "J", "K"
375 ],
376 pr.nodes()
377 .iter()
378 .map(|(node, _)| **node)
379 .collect::<Vec<&str>>()
380 );
381 }
382}