manifoldb_graph/analytics/
pagerank.rs1use std::collections::HashMap;
45
46use manifoldb_core::EntityId;
47use manifoldb_storage::Transaction;
48
49use crate::index::AdjacencyIndex;
50use crate::store::{EdgeStore, GraphError, GraphResult, NodeStore};
51
52pub const DEFAULT_MAX_GRAPH_NODES: usize = 10_000_000;
55
56#[derive(Debug, Clone)]
58pub struct PageRankConfig {
59 pub damping_factor: f64,
62
63 pub max_iterations: usize,
66
67 pub tolerance: f64,
70
71 pub normalize: bool,
74
75 pub max_graph_nodes: Option<usize>,
79}
80
81impl Default for PageRankConfig {
82 fn default() -> Self {
83 Self {
84 damping_factor: 0.85,
85 max_iterations: 100,
86 tolerance: 1e-6,
87 normalize: true,
88 max_graph_nodes: Some(DEFAULT_MAX_GRAPH_NODES),
89 }
90 }
91}
92
93impl PageRankConfig {
94 pub fn new() -> Self {
96 Self::default()
97 }
98
99 pub const fn with_damping_factor(mut self, damping_factor: f64) -> Self {
105 self.damping_factor = damping_factor;
106 self
107 }
108
109 pub const fn with_max_iterations(mut self, max_iterations: usize) -> Self {
111 self.max_iterations = max_iterations;
112 self
113 }
114
115 pub const fn with_tolerance(mut self, tolerance: f64) -> Self {
120 self.tolerance = tolerance;
121 self
122 }
123
124 pub const fn with_normalize(mut self, normalize: bool) -> Self {
126 self.normalize = normalize;
127 self
128 }
129
130 pub const fn with_max_graph_nodes(mut self, limit: Option<usize>) -> Self {
140 self.max_graph_nodes = limit;
141 self
142 }
143}
144
145#[derive(Debug, Clone)]
147pub struct PageRankResult {
148 pub scores: HashMap<EntityId, f64>,
150
151 pub iterations: usize,
153
154 pub converged: bool,
156
157 pub final_delta: f64,
159}
160
161impl PageRankResult {
162 pub fn score(&self, node: EntityId) -> Option<f64> {
164 self.scores.get(&node).copied()
165 }
166
167 pub fn sorted(&self) -> Vec<(EntityId, f64)> {
169 let mut pairs: Vec<_> = self.scores.iter().map(|(&id, &score)| (id, score)).collect();
170 pairs.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
171 pairs
172 }
173
174 pub fn top_n(&self, n: usize) -> Vec<(EntityId, f64)> {
176 self.sorted().into_iter().take(n).collect()
177 }
178
179 pub fn max(&self) -> Option<(EntityId, f64)> {
181 self.scores
182 .iter()
183 .max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
184 .map(|(&id, &score)| (id, score))
185 }
186
187 pub fn min(&self) -> Option<(EntityId, f64)> {
189 self.scores
190 .iter()
191 .min_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
192 .map(|(&id, &score)| (id, score))
193 }
194}
195
196pub struct PageRank;
202
203impl PageRank {
204 pub fn compute<T: Transaction>(tx: &T, config: &PageRankConfig) -> GraphResult<PageRankResult> {
223 if let Some(limit) = config.max_graph_nodes {
225 let node_count = NodeStore::count(tx)?;
226 if node_count > limit {
227 return Err(GraphError::GraphTooLarge { node_count, limit });
228 }
229 }
230
231 let mut nodes: Vec<EntityId> = Vec::new();
233 NodeStore::for_each(tx, |entity| {
234 nodes.push(entity.id);
235 true
236 })?;
237
238 let n = nodes.len();
239 if n == 0 {
240 return Ok(PageRankResult {
241 scores: HashMap::new(),
242 iterations: 0,
243 converged: true,
244 final_delta: 0.0,
245 });
246 }
247
248 let mut node_index: HashMap<EntityId, usize> = HashMap::with_capacity(n);
250 for (i, &id) in nodes.iter().enumerate() {
251 node_index.insert(id, i);
252 }
253
254 const AVG_DEGREE_ESTIMATE: usize = 8;
257 let mut out_neighbors: Vec<Vec<usize>> =
258 (0..n).map(|_| Vec::with_capacity(AVG_DEGREE_ESTIMATE)).collect();
259 let mut out_degrees: Vec<usize> = vec![0; n];
260
261 for (i, &node) in nodes.iter().enumerate() {
262 let outgoing = AdjacencyIndex::get_outgoing_edge_ids(tx, node)?;
264 out_degrees[i] = outgoing.len();
265
266 for edge_id in outgoing {
267 if let Some(edge) = EdgeStore::get(tx, edge_id)? {
268 if let Some(&target_idx) = node_index.get(&edge.target) {
269 out_neighbors[i].push(target_idx);
270 }
271 }
272 }
273 }
274
275 let mut in_neighbors: Vec<Vec<usize>> =
278 (0..n).map(|_| Vec::with_capacity(AVG_DEGREE_ESTIMATE)).collect();
279 for (i, neighbors) in out_neighbors.iter().enumerate() {
280 for &j in neighbors {
281 in_neighbors[j].push(i);
282 }
283 }
284
285 let initial_score = 1.0 / n as f64;
287 let mut scores: Vec<f64> = vec![initial_score; n];
288 let mut new_scores: Vec<f64> = vec![0.0; n];
289
290 let d = config.damping_factor;
291 let base_score = (1.0 - d) / n as f64;
292
293 let mut iterations = 0;
294 let mut converged = false;
295 let mut final_delta = f64::MAX;
296
297 while iterations < config.max_iterations {
299 iterations += 1;
300
301 let dangling_sum: f64 = nodes
303 .iter()
304 .enumerate()
305 .filter(|(i, _)| out_degrees[*i] == 0)
306 .map(|(i, _)| scores[i])
307 .sum();
308 let dangling_contribution = d * dangling_sum / n as f64;
309
310 for (i, incoming) in in_neighbors.iter().enumerate() {
312 let mut link_sum = 0.0;
313 for &j in incoming {
314 if out_degrees[j] > 0 {
315 link_sum += scores[j] / out_degrees[j] as f64;
316 }
317 }
318 new_scores[i] = base_score + d * link_sum + dangling_contribution;
319 }
320
321 let mut max_delta = 0.0f64;
323 for i in 0..n {
324 let delta = (new_scores[i] - scores[i]).abs();
325 max_delta = max_delta.max(delta);
326 }
327
328 final_delta = max_delta;
329
330 std::mem::swap(&mut scores, &mut new_scores);
332
333 if max_delta < config.tolerance {
334 converged = true;
335 break;
336 }
337 }
338
339 if config.normalize {
341 let total: f64 = scores.iter().sum();
342 if total > 0.0 {
343 for score in &mut scores {
344 *score /= total;
345 }
346 }
347 }
348
349 let scores_map: HashMap<EntityId, f64> = nodes.into_iter().zip(scores).collect();
351
352 Ok(PageRankResult { scores: scores_map, iterations, converged, final_delta })
353 }
354
355 pub fn compute_for_nodes<T: Transaction>(
366 tx: &T,
367 nodes: &[EntityId],
368 config: &PageRankConfig,
369 ) -> GraphResult<PageRankResult> {
370 let n = nodes.len();
371 if n == 0 {
372 return Ok(PageRankResult {
373 scores: HashMap::new(),
374 iterations: 0,
375 converged: true,
376 final_delta: 0.0,
377 });
378 }
379
380 let mut node_set: std::collections::HashSet<EntityId> =
382 std::collections::HashSet::with_capacity(n);
383 for &id in nodes {
384 node_set.insert(id);
385 }
386 let mut node_index: HashMap<EntityId, usize> = HashMap::with_capacity(n);
387 for (i, &id) in nodes.iter().enumerate() {
388 node_index.insert(id, i);
389 }
390
391 const AVG_DEGREE_ESTIMATE: usize = 8;
394 let mut out_neighbors: Vec<Vec<usize>> =
395 (0..n).map(|_| Vec::with_capacity(AVG_DEGREE_ESTIMATE)).collect();
396 let mut out_degrees: Vec<usize> = vec![0; n];
397
398 for (i, &node) in nodes.iter().enumerate() {
399 let outgoing = AdjacencyIndex::get_outgoing_edge_ids(tx, node)?;
400
401 for edge_id in outgoing {
402 if let Some(edge) = EdgeStore::get(tx, edge_id)? {
403 if node_set.contains(&edge.target) {
405 if let Some(&target_idx) = node_index.get(&edge.target) {
406 out_neighbors[i].push(target_idx);
407 out_degrees[i] += 1;
408 }
409 }
410 }
411 }
412 }
413
414 let mut in_neighbors: Vec<Vec<usize>> =
417 (0..n).map(|_| Vec::with_capacity(AVG_DEGREE_ESTIMATE)).collect();
418 for (i, neighbors) in out_neighbors.iter().enumerate() {
419 for &j in neighbors {
420 in_neighbors[j].push(i);
421 }
422 }
423
424 let initial_score = 1.0 / n as f64;
426 let mut scores: Vec<f64> = vec![initial_score; n];
427 let mut new_scores: Vec<f64> = vec![0.0; n];
428
429 let d = config.damping_factor;
430 let base_score = (1.0 - d) / n as f64;
431
432 let mut iterations = 0;
433 let mut converged = false;
434 let mut final_delta = f64::MAX;
435
436 while iterations < config.max_iterations {
437 iterations += 1;
438
439 let dangling_sum: f64 =
441 (0..n).filter(|&i| out_degrees[i] == 0).map(|i| scores[i]).sum();
442 let dangling_contribution = d * dangling_sum / n as f64;
443
444 for (i, incoming) in in_neighbors.iter().enumerate() {
446 let mut link_sum = 0.0;
447 for &j in incoming {
448 if out_degrees[j] > 0 {
449 link_sum += scores[j] / out_degrees[j] as f64;
450 }
451 }
452 new_scores[i] = base_score + d * link_sum + dangling_contribution;
453 }
454
455 let mut max_delta = 0.0f64;
457 for i in 0..n {
458 let delta = (new_scores[i] - scores[i]).abs();
459 max_delta = max_delta.max(delta);
460 }
461
462 final_delta = max_delta;
463 std::mem::swap(&mut scores, &mut new_scores);
464
465 if max_delta < config.tolerance {
466 converged = true;
467 break;
468 }
469 }
470
471 if config.normalize {
473 let total: f64 = scores.iter().sum();
474 if total > 0.0 {
475 for score in &mut scores {
476 *score /= total;
477 }
478 }
479 }
480
481 let scores_map: HashMap<EntityId, f64> = nodes.iter().copied().zip(scores).collect();
483
484 Ok(PageRankResult { scores: scores_map, iterations, converged, final_delta })
485 }
486}
487
488#[cfg(test)]
489mod tests {
490 use super::*;
491
492 #[test]
493 fn config_defaults() {
494 let config = PageRankConfig::default();
495 assert!((config.damping_factor - 0.85).abs() < f64::EPSILON);
496 assert_eq!(config.max_iterations, 100);
497 assert!((config.tolerance - 1e-6).abs() < f64::EPSILON);
498 assert!(config.normalize);
499 }
500
501 #[test]
502 fn config_builder() {
503 let config = PageRankConfig::new()
504 .with_damping_factor(0.9)
505 .with_max_iterations(50)
506 .with_tolerance(1e-8)
507 .with_normalize(false);
508
509 assert!((config.damping_factor - 0.9).abs() < f64::EPSILON);
510 assert_eq!(config.max_iterations, 50);
511 assert!((config.tolerance - 1e-8).abs() < f64::EPSILON);
512 assert!(!config.normalize);
513 }
514
515 #[test]
516 fn result_empty() {
517 let result = PageRankResult {
518 scores: HashMap::new(),
519 iterations: 0,
520 converged: true,
521 final_delta: 0.0,
522 };
523
524 assert!(result.score(EntityId::new(1)).is_none());
525 assert!(result.sorted().is_empty());
526 assert!(result.top_n(10).is_empty());
527 assert!(result.max().is_none());
528 assert!(result.min().is_none());
529 }
530
531 #[test]
532 fn result_sorted() {
533 let mut scores = HashMap::new();
534 scores.insert(EntityId::new(1), 0.3);
535 scores.insert(EntityId::new(2), 0.5);
536 scores.insert(EntityId::new(3), 0.2);
537
538 let result = PageRankResult { scores, iterations: 10, converged: true, final_delta: 1e-7 };
539
540 let sorted = result.sorted();
541 assert_eq!(sorted.len(), 3);
542 assert_eq!(sorted[0].0, EntityId::new(2)); assert_eq!(sorted[1].0, EntityId::new(1));
544 assert_eq!(sorted[2].0, EntityId::new(3)); }
546
547 #[test]
548 fn result_top_n() {
549 let mut scores = HashMap::new();
550 scores.insert(EntityId::new(1), 0.3);
551 scores.insert(EntityId::new(2), 0.5);
552 scores.insert(EntityId::new(3), 0.2);
553
554 let result = PageRankResult { scores, iterations: 10, converged: true, final_delta: 1e-7 };
555
556 let top2 = result.top_n(2);
557 assert_eq!(top2.len(), 2);
558 assert_eq!(top2[0].0, EntityId::new(2));
559 assert_eq!(top2[1].0, EntityId::new(1));
560 }
561
562 #[test]
563 fn result_max_min() {
564 let mut scores = HashMap::new();
565 scores.insert(EntityId::new(1), 0.3);
566 scores.insert(EntityId::new(2), 0.5);
567 scores.insert(EntityId::new(3), 0.2);
568
569 let result = PageRankResult { scores, iterations: 10, converged: true, final_delta: 1e-7 };
570
571 let max = result.max().unwrap();
572 assert_eq!(max.0, EntityId::new(2));
573 assert!((max.1 - 0.5).abs() < f64::EPSILON);
574
575 let min = result.min().unwrap();
576 assert_eq!(min.0, EntityId::new(3));
577 assert!((min.1 - 0.2).abs() < f64::EPSILON);
578 }
579}