Skip to main content

graphmind_graph_algorithms/
pagerank.rs

1//! PageRank algorithm implementation
2//!
3//! Implements REQ-ALGO-001: Node centrality
4
5use super::common::{GraphView, NodeId};
6use std::collections::HashMap;
7
8/// PageRank configuration
9pub struct PageRankConfig {
10    /// Damping factor (usually 0.85)
11    pub damping_factor: f64,
12    /// Number of iterations
13    pub iterations: usize,
14    /// Tolerance for convergence (0.0 = run all iterations)
15    pub tolerance: f64,
16    /// Whether to redistribute dangling node mass.
17    /// Set to false for LDBC Graphalytics compatibility (reference outputs
18    /// are generated without dangling redistribution).
19    pub dangling_redistribution: bool,
20}
21
22impl Default for PageRankConfig {
23    fn default() -> Self {
24        Self {
25            damping_factor: 0.85,
26            iterations: 20,
27            tolerance: 0.0001,
28            dangling_redistribution: true,
29        }
30    }
31}
32
33/// Calculate PageRank for the graph view
34pub fn page_rank(view: &GraphView, config: PageRankConfig) -> HashMap<NodeId, f64> {
35    let n = view.node_count;
36
37    if n == 0 {
38        return HashMap::new();
39    }
40
41    // 2. Initialize scores
42    // LDBC Graphalytics spec: initial score is 1/N
43    let initial_score = 1.0 / n as f64;
44    let mut scores = vec![initial_score; n];
45    let mut next_scores = vec![0.0; n];
46
47    // 3. Iteration
48    // LDBC Graphalytics spec: PR(v) = (1-d)/N + d * sum(PR(u)/out_degree(u))
49    let d = config.damping_factor;
50    let base_score = (1.0 - d) / n as f64;
51
52    for _ in 0..config.iterations {
53        let mut total_diff = 0.0;
54
55        // Compute dangling node mass if enabled
56        let dangling_contrib = if config.dangling_redistribution {
57            let dangling_sum: f64 = (0..n)
58                .filter(|&i| view.out_degree(i) == 0)
59                .map(|i| scores[i])
60                .sum();
61            dangling_sum / n as f64
62        } else {
63            0.0
64        };
65
66        for i in 0..n {
67            let mut sum_incoming = 0.0;
68
69            // Iterate over incoming edges
70            for &source_idx in view.predecessors(i) {
71                let out_degree = view.out_degree(source_idx);
72                if out_degree > 0 {
73                    sum_incoming += scores[source_idx] / out_degree as f64;
74                }
75            }
76
77            next_scores[i] = base_score + d * (sum_incoming + dangling_contrib);
78            total_diff += (next_scores[i] - scores[i]).abs();
79        }
80
81        // Swap buffers
82        scores.copy_from_slice(&next_scores);
83
84        // Check convergence
85        if total_diff < config.tolerance {
86            break;
87        }
88    }
89
90    // 4. Map back to NodeIds
91    let mut result = HashMap::with_capacity(n);
92    for (idx, score) in scores.into_iter().enumerate() {
93        result.insert(view.index_to_node[idx], score);
94    }
95
96    result
97}
98
99#[cfg(test)]
100mod tests {
101    use super::*;
102    use crate::common::GraphView;
103
104    fn build_triangle_graph() -> GraphView {
105        // Triangle: 1->2, 2->3, 3->1
106        let node_count = 3;
107        let index_to_node = vec![1, 2, 3];
108        let mut node_to_index = HashMap::new();
109        for (i, &id) in index_to_node.iter().enumerate() {
110            node_to_index.insert(id, i);
111        }
112        let outgoing = vec![vec![1], vec![2], vec![0]]; // 1->2, 2->3, 3->1
113        let incoming = vec![vec![2], vec![0], vec![1]]; // 1<-3, 2<-1, 3<-2
114        GraphView::from_adjacency_list(
115            node_count,
116            index_to_node,
117            node_to_index,
118            outgoing,
119            incoming,
120            None,
121        )
122    }
123
124    fn build_star_graph() -> GraphView {
125        // Star: 1->2, 1->3, 1->4 (node 1 is hub)
126        let node_count = 4;
127        let index_to_node = vec![1, 2, 3, 4];
128        let mut node_to_index = HashMap::new();
129        for (i, &id) in index_to_node.iter().enumerate() {
130            node_to_index.insert(id, i);
131        }
132        let outgoing = vec![vec![1, 2, 3], vec![], vec![], vec![]];
133        let incoming = vec![vec![], vec![0], vec![0], vec![0]];
134        GraphView::from_adjacency_list(
135            node_count,
136            index_to_node,
137            node_to_index,
138            outgoing,
139            incoming,
140            None,
141        )
142    }
143
144    #[test]
145    fn test_pagerank_empty_graph() {
146        let view = GraphView::from_adjacency_list(0, vec![], HashMap::new(), vec![], vec![], None);
147        let result = page_rank(&view, PageRankConfig::default());
148        assert!(result.is_empty());
149    }
150
151    #[test]
152    fn test_pagerank_single_node() {
153        let mut node_to_index = HashMap::new();
154        node_to_index.insert(1u64, 0);
155        let view = GraphView::from_adjacency_list(
156            1,
157            vec![1],
158            node_to_index,
159            vec![vec![]],
160            vec![vec![]],
161            None,
162        );
163        let result = page_rank(
164            &view,
165            PageRankConfig {
166                damping_factor: 0.85,
167                iterations: 20,
168                tolerance: 0.0001,
169                dangling_redistribution: true,
170            },
171        );
172        assert_eq!(result.len(), 1);
173        // Single node with dangling redistribution: score should be ~1.0
174        let score = result[&1];
175        assert!(
176            (score - 1.0).abs() < 0.01,
177            "Single node score should be ~1.0, got {}",
178            score
179        );
180    }
181
182    #[test]
183    fn test_pagerank_triangle_symmetric() {
184        let view = build_triangle_graph();
185        let result = page_rank(
186            &view,
187            PageRankConfig {
188                damping_factor: 0.85,
189                iterations: 100,
190                tolerance: 1e-10,
191                dangling_redistribution: true,
192            },
193        );
194
195        assert_eq!(result.len(), 3);
196        // In a symmetric triangle, all nodes should have equal PageRank
197        let scores: Vec<f64> = vec![result[&1], result[&2], result[&3]];
198        let avg = scores.iter().sum::<f64>() / 3.0;
199        for s in &scores {
200            assert!(
201                (s - avg).abs() < 0.001,
202                "Triangle nodes should have equal rank, got {:?}",
203                scores
204            );
205        }
206    }
207
208    #[test]
209    fn test_pagerank_star_hub_has_lowest_rank() {
210        // In a star where hub points outward, the hub sends rank but receives none
211        let view = build_star_graph();
212        let result = page_rank(
213            &view,
214            PageRankConfig {
215                damping_factor: 0.85,
216                iterations: 50,
217                tolerance: 1e-10,
218                dangling_redistribution: false,
219            },
220        );
221
222        assert_eq!(result.len(), 4);
223        // Without dangling redistribution, hub (node 1) that only sends should have lowest rank
224        // Leaf nodes (2,3,4) receive from hub but are dangling
225        let hub_score = result[&1];
226        let leaf_score = result[&2];
227        // Hub sends all its rank out, leaf receives rank from hub
228        assert!(
229            leaf_score > hub_score,
230            "Leaf ({}) should have higher rank than hub ({}) without dangling redistribution",
231            leaf_score,
232            hub_score
233        );
234    }
235
236    #[test]
237    fn test_pagerank_scores_sum_to_one() {
238        let view = build_triangle_graph();
239        let result = page_rank(
240            &view,
241            PageRankConfig {
242                damping_factor: 0.85,
243                iterations: 100,
244                tolerance: 1e-10,
245                dangling_redistribution: true,
246            },
247        );
248
249        let total: f64 = result.values().sum();
250        assert!(
251            (total - 1.0).abs() < 0.01,
252            "PageRank scores should sum to ~1.0 with dangling redistribution, got {}",
253            total
254        );
255    }
256
257    #[test]
258    fn test_pagerank_convergence() {
259        let view = build_triangle_graph();
260        // With very high tolerance, should converge in 1 iteration
261        let result_1 = page_rank(
262            &view,
263            PageRankConfig {
264                damping_factor: 0.85,
265                iterations: 1,
266                tolerance: 0.0,
267                dangling_redistribution: true,
268            },
269        );
270        let result_100 = page_rank(
271            &view,
272            PageRankConfig {
273                damping_factor: 0.85,
274                iterations: 100,
275                tolerance: 0.0,
276                dangling_redistribution: true,
277            },
278        );
279
280        // More iterations should give more accurate result
281        // In a symmetric triangle, converged score is 1/3
282        let target = 1.0 / 3.0;
283        let diff_1 = (result_1[&1] - target).abs();
284        let diff_100 = (result_100[&1] - target).abs();
285        assert!(
286            diff_100 <= diff_1,
287            "100 iterations should be closer to target than 1: diff_1={}, diff_100={}",
288            diff_1,
289            diff_100
290        );
291    }
292
293    #[test]
294    fn test_pagerank_dangling_redistribution_flag() {
295        let view = build_star_graph(); // Nodes 2,3,4 are dangling
296        let with_dangling = page_rank(
297            &view,
298            PageRankConfig {
299                damping_factor: 0.85,
300                iterations: 50,
301                tolerance: 1e-10,
302                dangling_redistribution: true,
303            },
304        );
305        let without_dangling = page_rank(
306            &view,
307            PageRankConfig {
308                damping_factor: 0.85,
309                iterations: 50,
310                tolerance: 1e-10,
311                dangling_redistribution: false,
312            },
313        );
314
315        // With dangling redistribution, scores should sum to ~1.0
316        let total_with: f64 = with_dangling.values().sum();
317        assert!(
318            (total_with - 1.0).abs() < 0.01,
319            "With dangling redistribution, sum should be ~1.0, got {}",
320            total_with
321        );
322
323        // Without redistribution, total will be < 1.0 (rank leaks through dangling nodes)
324        let total_without: f64 = without_dangling.values().sum();
325        assert!(
326            total_without < 0.9,
327            "Without dangling redistribution, rank should leak, got sum={}",
328            total_without
329        );
330    }
331
332    #[test]
333    fn test_pagerank_damping_factor_effect() {
334        let view = build_triangle_graph();
335        let low_damping = page_rank(
336            &view,
337            PageRankConfig {
338                damping_factor: 0.5,
339                iterations: 100,
340                tolerance: 1e-10,
341                dangling_redistribution: true,
342            },
343        );
344        let high_damping = page_rank(
345            &view,
346            PageRankConfig {
347                damping_factor: 0.99,
348                iterations: 100,
349                tolerance: 1e-10,
350                dangling_redistribution: true,
351            },
352        );
353
354        // Both should produce valid scores summing to 1
355        let total_low: f64 = low_damping.values().sum();
356        let total_high: f64 = high_damping.values().sum();
357        assert!((total_low - 1.0).abs() < 0.01);
358        assert!((total_high - 1.0).abs() < 0.01);
359    }
360}