simple_pagerank/
lib.rs

1//! # Simple Pagerank
2//!
3//! Pretty simple generic implementation of the PageRank graph sorting algorithm.
4#![deny(missing_docs)]
5#![allow(warnings)]
6use std::collections::HashMap;
7use std::default::Default;
8use std::hash::Hash;
9
10#[derive(Clone)]
11struct Node<T>
12where
13    T: Eq + Hash + Clone,
14{
15    /// Edge type
16    node: T,
17    /// List of edges (the ids which are edges in `nodes`)
18    in_edges: Vec<usize>,
19    /// Number of out edges
20    out_edges: usize,
21    score: f64,
22}
23
24/// PageRank structure.
25///
26pub struct Pagerank<T>
27where
28    T: Eq + Hash + Clone,
29{
30    /// Damping factor
31    ///
32    /// The PageRank theory holds that an imaginary surfer who is randomly clicking on edges will
33    /// eventually stop clicking. The probability, at any step, that the person will continue is a
34    /// damping factor d. Various studies have tested different damping factors, but it is generally
35    /// assumed that the damping factor will be set around 0.85.
36    damping: f64,
37    /// List of nodes. Each node is uniquely identified by their type T.
38    nodes: Vec<Node<T>>,
39    /// Total number of elements
40    edges: usize,
41    /// Keeps track of nodes and their position in the nodes vector.
42    node_positions: HashMap<T, usize>,
43    /// Cache to keep the count of total nodes with incoming edges. This cache gets reset everytime
44    /// a new node is being added to the graph.
45    nodes_with_in_edges: Option<usize>,
46}
47
48impl<T> Pagerank<T>
49where
50    T: Eq + Hash + Clone,
51{
52    /// Creates a new instance
53    pub fn new() -> Pagerank<T> {
54        Pagerank::<T> {
55            damping: 0.85,
56            nodes: Vec::new(),
57            edges: 0,
58            node_positions: HashMap::<T, usize>::new(),
59            nodes_with_in_edges: None,
60        }
61    }
62
63    /// Sets the dumping factor. A value between 0 and 100 is expected.
64    pub fn set_damping_factor(
65        &mut self,
66        factor: u8,
67    ) -> Result<(), String> {
68        if factor >= 100 {
69            return Err("{val} needs to be bellow 100".to_string());
70        }
71
72        self.damping = factor as f64 / 100_f64;
73        Ok(())
74    }
75
76    /// Adds an node between two nodes
77    pub fn add_edge(&mut self, source: T, target: T) {
78        let source = self.get_or_create_node(source);
79        let target = self.get_or_create_node(target);
80        self.nodes[source].out_edges += 1;
81        self.nodes[target].in_edges.push(source);
82        self.edges += 1;
83    }
84
85    /// Returns the current score of a gien node
86    pub fn get_score(&self, node: T) -> Option<f64> {
87        self.node_positions
88            .get(&node)
89            .map(|id| self.nodes[*id].score)
90    }
91
92    /// Returns the number of in edges for the given node
93    pub fn get_in_edges(&self, node: T) -> Option<usize> {
94        self.node_positions
95            .get(&node)
96            .map(|id| self.nodes[*id].in_edges.len())
97    }
98
99    /// Returns the number of out edges for the given node
100    pub fn get_out_edges(&self, node: T) -> Option<usize> {
101        self.node_positions
102            .get(&node)
103            .map(|id| self.nodes[*id].out_edges)
104    }
105
106    /// Returns the node_id for a given node name
107    pub fn get_or_create_node(&mut self, node: T) -> usize {
108        match self.node_positions.get(&node) {
109            Some(&value) => value,
110            _ => {
111                let id = self.nodes.len();
112                self.nodes.push(Node::<T> {
113                    node: node.clone(),
114                    in_edges: Vec::new(),
115                    out_edges: 0,
116                    score: 1f64 - self.damping,
117                });
118                self.node_positions.insert(node, id);
119                self.nodes_with_in_edges = None;
120                id
121            }
122        }
123    }
124
125    /// Calculates PageRank with custom convergence
126    pub fn calculate_with_convergence(
127        &mut self,
128        convergence: f64,
129    ) -> i32 {
130        let mut iterations = 0;
131
132        loop {
133            if self.calculate_step() < convergence {
134                break;
135            }
136            iterations += 1;
137        }
138
139        iterations
140    }
141
142    /// Calculates pagerank with custom convergence
143    pub fn calculate(&mut self) -> i32 {
144        self.calculate_with_convergence(0.01)
145    }
146
147    /// Return all nodes, sorted by their pagerank
148    pub fn nodes(&self) -> Vec<(&T, f64)> {
149        let mut nodes = self
150            .nodes
151            .iter()
152            .map(|node| (&node.node, node.score))
153            .collect::<Vec<(&T, f64)>>();
154
155        nodes.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
156
157        nodes
158    }
159
160    /// Calculates a single iteration of the PageRank
161    pub fn calculate_step(&mut self) -> f64 {
162        let mut current_iteration = self.nodes.clone();
163
164        let nodes = &self.nodes;
165
166        self.nodes
167            .iter()
168            .enumerate()
169            .map(|(id, n)| {
170                let score = n
171                    .in_edges
172                    .iter()
173                    .map(|node| {
174                        nodes[*node].score
175                            / nodes[*node].out_edges as f64
176                    })
177                    .sum::<f64>();
178
179                current_iteration[id].score =
180                    (1f64 - self.damping) + (self.damping * score);
181            })
182            .for_each(drop);
183
184        let convergence: f64 = self
185            .nodes
186            .iter()
187            .enumerate()
188            .map(|(id, n)| {
189                let diff = n.score - current_iteration[id].score;
190                diff * diff
191            })
192            .sum();
193
194        self.nodes = current_iteration;
195
196        convergence.sqrt() / self.len_nodes_with_in_edges() as f64
197    }
198
199    /// Len of all edges
200    pub fn len_nodes_with_in_edges(&mut self) -> usize {
201        if let Some(n) = self.nodes_with_in_edges {
202            return n;
203        }
204
205        let mut total = 0;
206
207        for node in self.nodes.iter() {
208            if node.in_edges.len() > 0 {
209                total += 1;
210            }
211        }
212
213        self.nodes_with_in_edges = Some(total);
214
215        total
216    }
217
218    /// Return the number of vertices/nodes in the current graph
219    pub fn len(&self) -> usize {
220        self.nodes.len()
221    }
222
223    /// Returns the number of edges in the current graph
224    pub fn len_node(&self) -> usize {
225        self.edges
226    }
227
228    /// If the graph is empty
229    pub fn is_empty(&self) -> bool {
230        self.nodes.is_empty()
231    }
232}
233
234impl<T> Default for Pagerank<T>
235where
236    T: Eq + Hash + Clone,
237{
238    fn default() -> Self {
239        Self::new()
240    }
241}
242
243#[cfg(test)]
244mod tests {
245    use crate::Pagerank;
246
247    #[test]
248    fn test_two_nodes_are_created() {
249        let mut pr = Pagerank::<&str>::new();
250        pr.add_edge("foo", "bar");
251        assert_eq!(2, pr.len())
252    }
253
254    #[test]
255    fn test_edges() {
256        let mut pr = Pagerank::<&str>::new();
257        pr.add_edge("foo", "bar");
258        assert_eq!(0, pr.get_or_create_node("foo"));
259        assert_eq!(1, pr.get_or_create_node("bar"));
260
261        assert_eq!(Some(0), pr.get_in_edges("foo"));
262        assert_eq!(Some(1), pr.get_out_edges("foo"));
263        assert_eq!(Some(1), pr.get_in_edges("bar"));
264        assert_eq!(Some(0), pr.get_out_edges("bar"));
265    }
266
267    #[test]
268    fn test_default_score() {
269        let mut pr = Pagerank::<&str>::new();
270        pr.add_edge("foo", "bar");
271        pr.add_edge("bar", "foo");
272        pr.add_edge("xxx", "bar");
273        pr.add_edge("yyy", "xxx");
274
275        assert_eq!(
276            15_i64,
277            (pr.get_score("foo").expect("float") * 100_f64) as i64
278        );
279        assert_eq!(pr.get_score("foo"), pr.get_score("bar"));
280        assert_eq!(pr.get_score("foo"), pr.get_score("xxx"));
281        assert_eq!(pr.get_score("foo"), pr.get_score("yyy"));
282    }
283
284    #[test]
285    fn test_iteration() {
286        let mut pr = Pagerank::<&str>::new();
287        pr.add_edge("foo", "bar");
288        pr.add_edge("bar", "foo");
289        pr.add_edge("xxx", "bar");
290        pr.add_edge("yyy", "xxx");
291
292        pr.calculate_step();
293
294        assert_eq!(
295            vec!["bar", "foo", "xxx", "yyy"],
296            pr.nodes()
297                .iter()
298                .map(|(node, _)| **node)
299                .collect::<Vec<&str>>()
300        );
301    }
302
303    #[test]
304    fn test_iterations() {
305        let mut pr = Pagerank::<&str>::new();
306        pr.add_edge("foo", "bar");
307        pr.add_edge("bar", "foo");
308        pr.add_edge("xxx", "bar");
309        pr.add_edge("yyy", "xxx");
310
311        assert_eq!(true, pr.calculate_step() > pr.calculate_step());
312        pr.calculate_step();
313
314        assert_eq!(
315            vec!["bar", "foo", "xxx", "yyy"],
316            pr.nodes()
317                .iter()
318                .map(|(node, _)| **node)
319                .collect::<Vec<&str>>()
320        );
321    }
322
323    #[test]
324    fn test_full_run() {
325        let mut pr = Pagerank::<&str>::new();
326        pr.add_edge("foo", "bar");
327        pr.add_edge("bar", "foo");
328        pr.add_edge("xxx", "bar");
329        pr.add_edge("yyy", "xxx");
330
331        assert_eq!(16, pr.calculate());
332
333        assert_eq!(
334            vec!["bar", "foo", "xxx", "yyy"],
335            pr.nodes()
336                .iter()
337                .map(|(node, _)| **node)
338                .collect::<Vec<&str>>()
339        );
340    }
341
342    #[test]
343    /// https://en.wikipedia.org/wiki/PageRank#/media/File:PageRanks-Example.svg
344    fn test_pagerank_example() {
345        let mut pr = Pagerank::new();
346        let edges = vec![
347            ("D", "A"),
348            ("D", "B"),
349            ("B", "C"),
350            ("C", "B"),
351            ("E", "B"),
352            ("E", "F"),
353            ("F", "B"),
354            ("F", "E"),
355            ("G", "B"),
356            ("G", "E"),
357            ("H", "B"),
358            ("H", "E"),
359            ("I", "B"),
360            ("I", "E"),
361            ("J", "E"),
362            ("K", "E"),
363        ];
364
365        edges
366            .iter()
367            .map(|(l1, l2)| pr.add_edge(*l1, *l2))
368            .for_each(drop);
369
370        pr.calculate();
371
372        assert_eq!(
373            vec![
374                "B", "C", "E", "F", "A", "D", "G", "H", "I", "J", "K"
375            ],
376            pr.nodes()
377                .iter()
378                .map(|(node, _)| **node)
379                .collect::<Vec<&str>>()
380        );
381    }
382}