manifoldb_graph/analytics/
eigenvector.rs1use std::collections::HashMap;
40
41use manifoldb_core::EntityId;
42use manifoldb_storage::Transaction;
43
44use crate::index::AdjacencyIndex;
45use crate::store::{EdgeStore, GraphError, GraphResult, NodeStore};
46use crate::traversal::Direction;
47
48use super::pagerank::DEFAULT_MAX_GRAPH_NODES;
49
50#[derive(Debug, Clone)]
52pub struct EigenvectorCentralityConfig {
53 pub direction: Direction,
56
57 pub max_iterations: usize,
60
61 pub tolerance: f64,
64
65 pub normalize: bool,
68
69 pub max_graph_nodes: Option<usize>,
73}
74
75impl Default for EigenvectorCentralityConfig {
76 fn default() -> Self {
77 Self {
78 direction: Direction::Both,
79 max_iterations: 100,
80 tolerance: 1e-6,
81 normalize: true,
82 max_graph_nodes: Some(DEFAULT_MAX_GRAPH_NODES),
83 }
84 }
85}
86
87impl EigenvectorCentralityConfig {
88 pub fn new() -> Self {
90 Self::default()
91 }
92
93 pub const fn with_direction(mut self, direction: Direction) -> Self {
99 self.direction = direction;
100 self
101 }
102
103 pub const fn with_max_iterations(mut self, max_iterations: usize) -> Self {
105 self.max_iterations = max_iterations;
106 self
107 }
108
109 pub const fn with_tolerance(mut self, tolerance: f64) -> Self {
114 self.tolerance = tolerance;
115 self
116 }
117
118 pub const fn with_normalize(mut self, normalize: bool) -> Self {
120 self.normalize = normalize;
121 self
122 }
123
124 pub const fn with_max_graph_nodes(mut self, limit: Option<usize>) -> Self {
133 self.max_graph_nodes = limit;
134 self
135 }
136}
137
138#[derive(Debug, Clone)]
140pub struct EigenvectorCentralityResult {
141 pub scores: HashMap<EntityId, f64>,
143
144 pub iterations: usize,
146
147 pub converged: bool,
149
150 pub final_delta: f64,
152}
153
154impl EigenvectorCentralityResult {
155 pub fn score(&self, node: EntityId) -> Option<f64> {
157 self.scores.get(&node).copied()
158 }
159
160 pub fn sorted(&self) -> Vec<(EntityId, f64)> {
162 let mut pairs: Vec<_> = self.scores.iter().map(|(&id, &score)| (id, score)).collect();
163 pairs.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
164 pairs
165 }
166
167 pub fn top_n(&self, n: usize) -> Vec<(EntityId, f64)> {
169 self.sorted().into_iter().take(n).collect()
170 }
171
172 pub fn max(&self) -> Option<(EntityId, f64)> {
174 self.scores
175 .iter()
176 .max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
177 .map(|(&id, &score)| (id, score))
178 }
179
180 pub fn min(&self) -> Option<(EntityId, f64)> {
182 self.scores
183 .iter()
184 .min_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
185 .map(|(&id, &score)| (id, score))
186 }
187
188 pub fn mean(&self) -> f64 {
190 if self.scores.is_empty() {
191 return 0.0;
192 }
193 self.scores.values().sum::<f64>() / self.scores.len() as f64
194 }
195}
196
197pub struct EigenvectorCentrality;
203
204impl EigenvectorCentrality {
205 pub fn compute<T: Transaction>(
224 tx: &T,
225 config: &EigenvectorCentralityConfig,
226 ) -> GraphResult<EigenvectorCentralityResult> {
227 if let Some(limit) = config.max_graph_nodes {
229 let node_count = NodeStore::count(tx)?;
230 if node_count > limit {
231 return Err(GraphError::GraphTooLarge { node_count, limit });
232 }
233 }
234
235 let mut nodes: Vec<EntityId> = Vec::new();
237 NodeStore::for_each(tx, |entity| {
238 nodes.push(entity.id);
239 true
240 })?;
241
242 let n = nodes.len();
243 if n == 0 {
244 return Ok(EigenvectorCentralityResult {
245 scores: HashMap::new(),
246 iterations: 0,
247 converged: true,
248 final_delta: 0.0,
249 });
250 }
251
252 let mut node_index: HashMap<EntityId, usize> = HashMap::with_capacity(n);
254 for (i, &id) in nodes.iter().enumerate() {
255 node_index.insert(id, i);
256 }
257
258 const AVG_DEGREE_ESTIMATE: usize = 8;
260 let mut neighbors: Vec<Vec<usize>> =
261 (0..n).map(|_| Vec::with_capacity(AVG_DEGREE_ESTIMATE)).collect();
262 for (i, &node) in nodes.iter().enumerate() {
263 let neighbor_ids = Self::get_neighbors(tx, node, config.direction)?;
264 for neighbor_id in neighbor_ids {
265 if let Some(&j) = node_index.get(&neighbor_id) {
266 neighbors[i].push(j);
267 }
268 }
269 }
270
271 let initial_score = 1.0 / (n as f64).sqrt();
273 let mut scores: Vec<f64> = vec![initial_score; n];
274 let mut new_scores: Vec<f64> = vec![0.0; n];
275
276 let mut iterations = 0;
277 let mut converged = false;
278 let mut final_delta = f64::MAX;
279
280 while iterations < config.max_iterations {
282 iterations += 1;
283
284 new_scores.fill(0.0);
286
287 for (i, neighbor_list) in neighbors.iter().enumerate() {
290 for &j in neighbor_list {
291 new_scores[i] += scores[j];
292 }
293 }
294
295 let norm: f64 = new_scores.iter().map(|&x| x * x).sum::<f64>().sqrt();
297
298 if norm < f64::EPSILON {
300 let uniform_score = 1.0 / (n as f64).sqrt();
302 new_scores.fill(uniform_score);
303 } else {
304 for score in &mut new_scores {
306 *score /= norm;
307 }
308 }
309
310 let mut max_delta = 0.0f64;
312 for i in 0..n {
313 let delta = (new_scores[i] - scores[i]).abs();
314 max_delta = max_delta.max(delta);
315 }
316
317 final_delta = max_delta;
318
319 std::mem::swap(&mut scores, &mut new_scores);
321
322 if max_delta < config.tolerance {
323 converged = true;
324 break;
325 }
326 }
327
328 if config.normalize {
330 let norm: f64 = scores.iter().map(|&x| x * x).sum::<f64>().sqrt();
331 if norm > f64::EPSILON {
332 for score in &mut scores {
333 *score /= norm;
334 }
335 }
336 }
337
338 let scores_map: HashMap<EntityId, f64> = nodes.into_iter().zip(scores).collect();
340
341 Ok(EigenvectorCentralityResult { scores: scores_map, iterations, converged, final_delta })
342 }
343
344 pub fn compute_for_nodes<T: Transaction>(
354 tx: &T,
355 nodes: &[EntityId],
356 config: &EigenvectorCentralityConfig,
357 ) -> GraphResult<EigenvectorCentralityResult> {
358 let n = nodes.len();
359 if n == 0 {
360 return Ok(EigenvectorCentralityResult {
361 scores: HashMap::new(),
362 iterations: 0,
363 converged: true,
364 final_delta: 0.0,
365 });
366 }
367
368 let node_set: std::collections::HashSet<EntityId> = nodes.iter().copied().collect();
370 let mut node_index: HashMap<EntityId, usize> = HashMap::with_capacity(n);
371 for (i, &id) in nodes.iter().enumerate() {
372 node_index.insert(id, i);
373 }
374
375 const AVG_DEGREE_ESTIMATE: usize = 8;
377 let mut neighbors: Vec<Vec<usize>> =
378 (0..n).map(|_| Vec::with_capacity(AVG_DEGREE_ESTIMATE)).collect();
379 for (i, &node) in nodes.iter().enumerate() {
380 let neighbor_ids = Self::get_neighbors(tx, node, config.direction)?;
381 for neighbor_id in neighbor_ids {
382 if node_set.contains(&neighbor_id) {
383 if let Some(&j) = node_index.get(&neighbor_id) {
384 neighbors[i].push(j);
385 }
386 }
387 }
388 }
389
390 let initial_score = 1.0 / (n as f64).sqrt();
392 let mut scores: Vec<f64> = vec![initial_score; n];
393 let mut new_scores: Vec<f64> = vec![0.0; n];
394
395 let mut iterations = 0;
396 let mut converged = false;
397 let mut final_delta = f64::MAX;
398
399 while iterations < config.max_iterations {
401 iterations += 1;
402
403 new_scores.fill(0.0);
404
405 for (i, neighbor_list) in neighbors.iter().enumerate() {
406 for &j in neighbor_list {
407 new_scores[i] += scores[j];
408 }
409 }
410
411 let norm: f64 = new_scores.iter().map(|&x| x * x).sum::<f64>().sqrt();
412
413 if norm < f64::EPSILON {
414 let uniform_score = 1.0 / (n as f64).sqrt();
415 new_scores.fill(uniform_score);
416 } else {
417 for score in &mut new_scores {
418 *score /= norm;
419 }
420 }
421
422 let mut max_delta = 0.0f64;
423 for i in 0..n {
424 let delta = (new_scores[i] - scores[i]).abs();
425 max_delta = max_delta.max(delta);
426 }
427
428 final_delta = max_delta;
429 std::mem::swap(&mut scores, &mut new_scores);
430
431 if max_delta < config.tolerance {
432 converged = true;
433 break;
434 }
435 }
436
437 if config.normalize {
438 let norm: f64 = scores.iter().map(|&x| x * x).sum::<f64>().sqrt();
439 if norm > f64::EPSILON {
440 for score in &mut scores {
441 *score /= norm;
442 }
443 }
444 }
445
446 let scores_map: HashMap<EntityId, f64> = nodes.iter().copied().zip(scores).collect();
447
448 Ok(EigenvectorCentralityResult { scores: scores_map, iterations, converged, final_delta })
449 }
450
451 fn get_neighbors<T: Transaction>(
453 tx: &T,
454 node: EntityId,
455 direction: Direction,
456 ) -> GraphResult<Vec<EntityId>> {
457 const INITIAL_NEIGHBORS_CAPACITY: usize = 16;
458 let mut neighbors = Vec::with_capacity(INITIAL_NEIGHBORS_CAPACITY);
459
460 if direction.includes_outgoing() {
461 let edges = AdjacencyIndex::get_outgoing_edge_ids(tx, node)?;
462 for edge_id in edges {
463 if let Some(edge) = EdgeStore::get(tx, edge_id)? {
464 neighbors.push(edge.target);
465 }
466 }
467 }
468
469 if direction.includes_incoming() {
470 let edges = AdjacencyIndex::get_incoming_edge_ids(tx, node)?;
471 for edge_id in edges {
472 if let Some(edge) = EdgeStore::get(tx, edge_id)? {
473 neighbors.push(edge.source);
474 }
475 }
476 }
477
478 Ok(neighbors)
479 }
480}
481
482#[cfg(test)]
483mod tests {
484 use super::*;
485
486 #[test]
487 fn config_defaults() {
488 let config = EigenvectorCentralityConfig::default();
489 assert_eq!(config.direction, Direction::Both);
490 assert_eq!(config.max_iterations, 100);
491 assert!((config.tolerance - 1e-6).abs() < f64::EPSILON);
492 assert!(config.normalize);
493 assert_eq!(config.max_graph_nodes, Some(DEFAULT_MAX_GRAPH_NODES));
494 }
495
496 #[test]
497 fn config_builder() {
498 let config = EigenvectorCentralityConfig::new()
499 .with_direction(Direction::Outgoing)
500 .with_max_iterations(50)
501 .with_tolerance(1e-8)
502 .with_normalize(false)
503 .with_max_graph_nodes(Some(1000));
504
505 assert_eq!(config.direction, Direction::Outgoing);
506 assert_eq!(config.max_iterations, 50);
507 assert!((config.tolerance - 1e-8).abs() < f64::EPSILON);
508 assert!(!config.normalize);
509 assert_eq!(config.max_graph_nodes, Some(1000));
510 }
511
512 #[test]
513 fn result_empty() {
514 let result = EigenvectorCentralityResult {
515 scores: HashMap::new(),
516 iterations: 0,
517 converged: true,
518 final_delta: 0.0,
519 };
520
521 assert!(result.score(EntityId::new(1)).is_none());
522 assert!(result.sorted().is_empty());
523 assert!(result.top_n(10).is_empty());
524 assert!(result.max().is_none());
525 assert!(result.min().is_none());
526 assert!((result.mean() - 0.0).abs() < f64::EPSILON);
527 }
528
529 #[test]
530 fn result_sorted() {
531 let mut scores = HashMap::new();
532 scores.insert(EntityId::new(1), 0.3);
533 scores.insert(EntityId::new(2), 0.5);
534 scores.insert(EntityId::new(3), 0.2);
535
536 let result = EigenvectorCentralityResult {
537 scores,
538 iterations: 10,
539 converged: true,
540 final_delta: 1e-7,
541 };
542
543 let sorted = result.sorted();
544 assert_eq!(sorted.len(), 3);
545 assert_eq!(sorted[0].0, EntityId::new(2)); assert_eq!(sorted[1].0, EntityId::new(1));
547 assert_eq!(sorted[2].0, EntityId::new(3)); }
549
550 #[test]
551 fn result_top_n() {
552 let mut scores = HashMap::new();
553 scores.insert(EntityId::new(1), 0.3);
554 scores.insert(EntityId::new(2), 0.5);
555 scores.insert(EntityId::new(3), 0.2);
556
557 let result = EigenvectorCentralityResult {
558 scores,
559 iterations: 10,
560 converged: true,
561 final_delta: 1e-7,
562 };
563
564 let top2 = result.top_n(2);
565 assert_eq!(top2.len(), 2);
566 assert_eq!(top2[0].0, EntityId::new(2));
567 assert_eq!(top2[1].0, EntityId::new(1));
568 }
569
570 #[test]
571 fn result_max_min() {
572 let mut scores = HashMap::new();
573 scores.insert(EntityId::new(1), 0.3);
574 scores.insert(EntityId::new(2), 0.5);
575 scores.insert(EntityId::new(3), 0.2);
576
577 let result = EigenvectorCentralityResult {
578 scores,
579 iterations: 10,
580 converged: true,
581 final_delta: 1e-7,
582 };
583
584 let max = result.max().unwrap();
585 assert_eq!(max.0, EntityId::new(2));
586 assert!((max.1 - 0.5).abs() < f64::EPSILON);
587
588 let min = result.min().unwrap();
589 assert_eq!(min.0, EntityId::new(3));
590 assert!((min.1 - 0.2).abs() < f64::EPSILON);
591 }
592
593 #[test]
594 fn result_mean() {
595 let mut scores = HashMap::new();
596 scores.insert(EntityId::new(1), 0.3);
597 scores.insert(EntityId::new(2), 0.6);
598 scores.insert(EntityId::new(3), 0.9);
599
600 let result = EigenvectorCentralityResult {
601 scores,
602 iterations: 10,
603 converged: true,
604 final_delta: 1e-7,
605 };
606
607 assert!((result.mean() - 0.6).abs() < f64::EPSILON);
608 }
609}