unobtanium_graph_algorithms/graph/
pagerank.rs

1// SPDX-FileCopyrightText: 2025 Slatian 2025
2//
3// SPDX-License-Identifier: LGPL-3.0-only
4
5use crate::PageRankConfiguration;
6use crate::graph::Graph;
7use crate::graph::ranked_nodes::RankedNodes;
8use crate::node::NodeId;
9
10use core::hash::Hash;
11
12// Based on the textrank algorithm of the keyword_extraction crate:
13// https://github.com/tugascript/keyword-extraction-rs/blob/master/src/text_rank/text_rank_logic.rs
14
15/// PageRank and TextRank related graph functionality
16impl<T: Eq + Hash + Clone> Graph<T> {
17	/// Add tokens to graph, use a window in which all tokens will be connected with each other using bidirectional connections. Use this if you want to use textrank for keyword extraction.
18	///
19	/// Since textrank is just pagerank you can use the `run_pagerank()` function to get the results.
20	pub fn populate_for_textrank(&mut self, tokens: &[T], window_size: usize) {
21		let edges = tokens.iter().enumerate().flat_map(|(i, from)| {
22			tokens[i + 1..]
23				.iter()
24				.take(window_size)
25				.filter(move |to| &from != to)
26				.map(move |to| (from, to))
27		});
28		self.modify(|mut graph| {
29			for (from, to) in edges {
30				let from_id = graph.fetch_id_for_data(from);
31				let to_id = graph.fetch_id_for_data(to);
32				graph.add_edge(from_id, to_id);
33				graph.add_edge(to_id, from_id);
34			}
35		});
36	}
37
38	/// Runs the textrank algorithm on the graph and returns a HashMap mapping node values to the resulting scores.
39	pub fn run_pagerank(&self, config: &PageRankConfiguration) -> RankedNodes<'_, T> {
40		//TODO: Add a max_iterations value here to prevent too long loops.
41		//      This is likely processing untrusted data.
42
43		// TODO: Add the "bored surfer" part to mitigate score sinks.
44
45		let mut scores = vec![1.0_f32; self.nodes_length];
46
47		loop {
48			let prev_scores = scores.to_owned();
49			let max_diff;
50			(scores, max_diff) = self.iterate_pagerank_once(&prev_scores, config.damping);
51
52			if max_diff < config.tolerance {
53				break;
54			}
55		}
56
57		RankedNodes {
58			graph: self,
59			scores,
60		}
61	}
62
63	/// Perform one pagerank/textrank iteration
64	fn iterate_pagerank_once(&self, prev_scores: &[f32], damping: f32) -> (Vec<f32>, f32) {
65		let mut new_scores = Vec::with_capacity(prev_scores.len());
66		let mut max_diff = 0.0_f32;
67		for (i, incoming_edges) in self.graph.iter().enumerate() {
68			let new_score = self.pagerank_score_node(incoming_edges, prev_scores, damping);
69			let diff = (prev_scores[i] - new_score).abs();
70			if diff > max_diff {
71				max_diff = diff;
72			}
73			new_scores.push(new_score);
74		}
75		(new_scores, max_diff)
76	}
77
78	/// computes the new score for one node by **pulling in** the values from its edges.
79	fn pagerank_score_node(
80		&self,
81		incoming_edges: &[(NodeId, f32)],
82		prev_scores: &[f32],
83		damping: f32,
84	) -> f32 {
85		let new_score = incoming_edges
86			.iter()
87			.map(|(from, weight)| {
88				let from_outgoing_sum = self.outgoing_weight_sums[from.0];
89				// The `R(v) / N_v` part of the original pagerank equation, times weight
90				(prev_scores[from.0] / from_outgoing_sum) * weight
91			})
92			.sum::<f32>();
93
94		(1.0 - damping) + damping * new_score
95	}
96}