Skip to main content

manifoldb_graph/analytics/
eigenvector.rs

1//! Eigenvector Centrality implementation.
2//!
3//! Eigenvector centrality measures node importance based on connections to
4//! important nodes. A node is important if it's connected to other important
5//! nodes. This creates a recursive definition that can be computed using
6//! the power iteration method.
7//!
8//! # Algorithm
9//!
10//! This module implements eigenvector centrality using the power iteration
11//! method to find the dominant eigenvector of the adjacency matrix.
12//!
13//! # Formula
14//!
15//! For node v:
16//! EC(v) = (1/λ) * Σ EC(u) for all u connected to v
17//!
18//! where λ is the largest eigenvalue of the adjacency matrix.
19//!
20//! The algorithm iteratively updates scores until convergence:
21//! x(k+1) = A * x(k) / ||A * x(k)||
22//!
23//! # Example
24//!
25//! ```ignore
26//! use manifoldb_graph::analytics::{EigenvectorCentrality, EigenvectorCentralityConfig};
27//!
28//! let config = EigenvectorCentralityConfig::default()
29//!     .with_tolerance(1e-6)
30//!     .with_max_iterations(100);
31//! let result = EigenvectorCentrality::compute(&tx, &config)?;
32//!
33//! // Find the most important nodes
34//! for (node, score) in result.top_n(10) {
35//!     println!("Node {:?} has eigenvector centrality {:.4}", node, score);
36//! }
37//! ```
38
39use std::collections::HashMap;
40
41use manifoldb_core::EntityId;
42use manifoldb_storage::Transaction;
43
44use crate::index::AdjacencyIndex;
45use crate::store::{EdgeStore, GraphError, GraphResult, NodeStore};
46use crate::traversal::Direction;
47
48use super::pagerank::DEFAULT_MAX_GRAPH_NODES;
49
50/// Configuration for Eigenvector Centrality computation.
51#[derive(Debug, Clone)]
52pub struct EigenvectorCentralityConfig {
53    /// Direction of edges to follow.
54    /// Default: Both (treat as undirected)
55    pub direction: Direction,
56
57    /// Maximum number of iterations before stopping.
58    /// Default: 100
59    pub max_iterations: usize,
60
61    /// Convergence tolerance. Algorithm stops when max score change < tolerance.
62    /// Default: 1e-6
63    pub tolerance: f64,
64
65    /// Whether to normalize scores to have unit L2 norm.
66    /// Default: true
67    pub normalize: bool,
68
69    /// Maximum number of nodes allowed before returning an error.
70    /// Set to `None` to disable the check.
71    /// Default: 10,000,000 (10M nodes)
72    pub max_graph_nodes: Option<usize>,
73}
74
75impl Default for EigenvectorCentralityConfig {
76    fn default() -> Self {
77        Self {
78            direction: Direction::Both,
79            max_iterations: 100,
80            tolerance: 1e-6,
81            normalize: true,
82            max_graph_nodes: Some(DEFAULT_MAX_GRAPH_NODES),
83        }
84    }
85}
86
87impl EigenvectorCentralityConfig {
88    /// Create a new configuration with default values.
89    pub fn new() -> Self {
90        Self::default()
91    }
92
93    /// Set the direction for following edges.
94    ///
95    /// - `Outgoing`: Follow edges in their natural direction
96    /// - `Incoming`: Follow edges in reverse
97    /// - `Both`: Treat graph as undirected (default)
98    pub const fn with_direction(mut self, direction: Direction) -> Self {
99        self.direction = direction;
100        self
101    }
102
103    /// Set the maximum number of iterations.
104    pub const fn with_max_iterations(mut self, max_iterations: usize) -> Self {
105        self.max_iterations = max_iterations;
106        self
107    }
108
109    /// Set the convergence tolerance.
110    ///
111    /// The algorithm stops when the maximum change in any node's score
112    /// between iterations is less than this value.
113    pub const fn with_tolerance(mut self, tolerance: f64) -> Self {
114        self.tolerance = tolerance;
115        self
116    }
117
118    /// Set whether to normalize scores to have unit L2 norm.
119    pub const fn with_normalize(mut self, normalize: bool) -> Self {
120        self.normalize = normalize;
121        self
122    }
123
124    /// Set the maximum number of nodes allowed.
125    ///
126    /// If the graph has more nodes than this limit, the algorithm will
127    /// return a [`GraphError::GraphTooLarge`] error.
128    ///
129    /// Set to `None` to disable the check (use with caution).
130    ///
131    /// [`GraphError::GraphTooLarge`]: crate::store::GraphError::GraphTooLarge
132    pub const fn with_max_graph_nodes(mut self, limit: Option<usize>) -> Self {
133        self.max_graph_nodes = limit;
134        self
135    }
136}
137
138/// Result of Eigenvector Centrality computation.
139#[derive(Debug, Clone)]
140pub struct EigenvectorCentralityResult {
141    /// Eigenvector centrality scores for each node.
142    pub scores: HashMap<EntityId, f64>,
143
144    /// Number of iterations performed.
145    pub iterations: usize,
146
147    /// Whether the algorithm converged within tolerance.
148    pub converged: bool,
149
150    /// Final convergence delta (max change in last iteration).
151    pub final_delta: f64,
152}
153
154impl EigenvectorCentralityResult {
155    /// Get the eigenvector centrality score for a specific node.
156    pub fn score(&self, node: EntityId) -> Option<f64> {
157        self.scores.get(&node).copied()
158    }
159
160    /// Get nodes sorted by eigenvector centrality (descending).
161    pub fn sorted(&self) -> Vec<(EntityId, f64)> {
162        let mut pairs: Vec<_> = self.scores.iter().map(|(&id, &score)| (id, score)).collect();
163        pairs.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
164        pairs
165    }
166
167    /// Get the top N nodes by eigenvector centrality.
168    pub fn top_n(&self, n: usize) -> Vec<(EntityId, f64)> {
169        self.sorted().into_iter().take(n).collect()
170    }
171
172    /// Get the node with the highest eigenvector centrality.
173    pub fn max(&self) -> Option<(EntityId, f64)> {
174        self.scores
175            .iter()
176            .max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
177            .map(|(&id, &score)| (id, score))
178    }
179
180    /// Get the node with the lowest eigenvector centrality.
181    pub fn min(&self) -> Option<(EntityId, f64)> {
182        self.scores
183            .iter()
184            .min_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
185            .map(|(&id, &score)| (id, score))
186    }
187
188    /// Get the mean eigenvector centrality.
189    pub fn mean(&self) -> f64 {
190        if self.scores.is_empty() {
191            return 0.0;
192        }
193        self.scores.values().sum::<f64>() / self.scores.len() as f64
194    }
195}
196
197/// Eigenvector Centrality algorithm implementation.
198///
199/// Eigenvector centrality assigns importance scores to nodes based on
200/// connections to important nodes. The algorithm uses the power iteration
201/// method to compute the dominant eigenvector of the adjacency matrix.
202pub struct EigenvectorCentrality;
203
204impl EigenvectorCentrality {
205    /// Compute eigenvector centrality for all nodes in the graph.
206    ///
207    /// # Arguments
208    ///
209    /// * `tx` - The transaction to use for graph access
210    /// * `config` - Configuration parameters for the algorithm
211    ///
212    /// # Returns
213    ///
214    /// An `EigenvectorCentralityResult` containing scores for all nodes.
215    ///
216    /// # Algorithm
217    ///
218    /// Uses the power iteration method:
219    /// 1. Initialize all nodes with score 1/√n
220    /// 2. Iteratively update scores: x(k+1) = A * x(k)
221    /// 3. Normalize after each iteration
222    /// 4. Check for convergence
223    pub fn compute<T: Transaction>(
224        tx: &T,
225        config: &EigenvectorCentralityConfig,
226    ) -> GraphResult<EigenvectorCentralityResult> {
227        // Check graph size before allocating large data structures
228        if let Some(limit) = config.max_graph_nodes {
229            let node_count = NodeStore::count(tx)?;
230            if node_count > limit {
231                return Err(GraphError::GraphTooLarge { node_count, limit });
232            }
233        }
234
235        // Collect all nodes
236        let mut nodes: Vec<EntityId> = Vec::new();
237        NodeStore::for_each(tx, |entity| {
238            nodes.push(entity.id);
239            true
240        })?;
241
242        let n = nodes.len();
243        if n == 0 {
244            return Ok(EigenvectorCentralityResult {
245                scores: HashMap::new(),
246                iterations: 0,
247                converged: true,
248                final_delta: 0.0,
249            });
250        }
251
252        // Build node index for fast lookup
253        let mut node_index: HashMap<EntityId, usize> = HashMap::with_capacity(n);
254        for (i, &id) in nodes.iter().enumerate() {
255            node_index.insert(id, i);
256        }
257
258        // Build adjacency lists
259        const AVG_DEGREE_ESTIMATE: usize = 8;
260        let mut neighbors: Vec<Vec<usize>> =
261            (0..n).map(|_| Vec::with_capacity(AVG_DEGREE_ESTIMATE)).collect();
262        for (i, &node) in nodes.iter().enumerate() {
263            let neighbor_ids = Self::get_neighbors(tx, node, config.direction)?;
264            for neighbor_id in neighbor_ids {
265                if let Some(&j) = node_index.get(&neighbor_id) {
266                    neighbors[i].push(j);
267                }
268            }
269        }
270
271        // Initialize scores with uniform distribution
272        let initial_score = 1.0 / (n as f64).sqrt();
273        let mut scores: Vec<f64> = vec![initial_score; n];
274        let mut new_scores: Vec<f64> = vec![0.0; n];
275
276        let mut iterations = 0;
277        let mut converged = false;
278        let mut final_delta = f64::MAX;
279
280        // Power iteration
281        while iterations < config.max_iterations {
282            iterations += 1;
283
284            // Reset new scores
285            new_scores.fill(0.0);
286
287            // Compute new scores: x(k+1) = A * x(k)
288            // For each node, sum the scores of its neighbors
289            for (i, neighbor_list) in neighbors.iter().enumerate() {
290                for &j in neighbor_list {
291                    new_scores[i] += scores[j];
292                }
293            }
294
295            // Compute L2 norm for normalization
296            let norm: f64 = new_scores.iter().map(|&x| x * x).sum::<f64>().sqrt();
297
298            // Handle the case where all scores are zero (disconnected graph)
299            if norm < f64::EPSILON {
300                // Fall back to uniform distribution
301                let uniform_score = 1.0 / (n as f64).sqrt();
302                new_scores.fill(uniform_score);
303            } else {
304                // Normalize to unit L2 norm
305                for score in &mut new_scores {
306                    *score /= norm;
307                }
308            }
309
310            // Check convergence
311            let mut max_delta = 0.0f64;
312            for i in 0..n {
313                let delta = (new_scores[i] - scores[i]).abs();
314                max_delta = max_delta.max(delta);
315            }
316
317            final_delta = max_delta;
318
319            // Swap scores
320            std::mem::swap(&mut scores, &mut new_scores);
321
322            if max_delta < config.tolerance {
323                converged = true;
324                break;
325            }
326        }
327
328        // Final normalization if not already normalized
329        if config.normalize {
330            let norm: f64 = scores.iter().map(|&x| x * x).sum::<f64>().sqrt();
331            if norm > f64::EPSILON {
332                for score in &mut scores {
333                    *score /= norm;
334                }
335            }
336        }
337
338        // Build result map
339        let scores_map: HashMap<EntityId, f64> = nodes.into_iter().zip(scores).collect();
340
341        Ok(EigenvectorCentralityResult { scores: scores_map, iterations, converged, final_delta })
342    }
343
344    /// Compute eigenvector centrality for a subset of nodes.
345    ///
346    /// Only considers edges within the specified subgraph.
347    ///
348    /// # Arguments
349    ///
350    /// * `tx` - The transaction to use for graph access
351    /// * `nodes` - The nodes to include in the computation
352    /// * `config` - Configuration parameters for the algorithm
353    pub fn compute_for_nodes<T: Transaction>(
354        tx: &T,
355        nodes: &[EntityId],
356        config: &EigenvectorCentralityConfig,
357    ) -> GraphResult<EigenvectorCentralityResult> {
358        let n = nodes.len();
359        if n == 0 {
360            return Ok(EigenvectorCentralityResult {
361                scores: HashMap::new(),
362                iterations: 0,
363                converged: true,
364                final_delta: 0.0,
365            });
366        }
367
368        // Build node index and set for fast lookup
369        let node_set: std::collections::HashSet<EntityId> = nodes.iter().copied().collect();
370        let mut node_index: HashMap<EntityId, usize> = HashMap::with_capacity(n);
371        for (i, &id) in nodes.iter().enumerate() {
372            node_index.insert(id, i);
373        }
374
375        // Build adjacency lists (only including edges within the subgraph)
376        const AVG_DEGREE_ESTIMATE: usize = 8;
377        let mut neighbors: Vec<Vec<usize>> =
378            (0..n).map(|_| Vec::with_capacity(AVG_DEGREE_ESTIMATE)).collect();
379        for (i, &node) in nodes.iter().enumerate() {
380            let neighbor_ids = Self::get_neighbors(tx, node, config.direction)?;
381            for neighbor_id in neighbor_ids {
382                if node_set.contains(&neighbor_id) {
383                    if let Some(&j) = node_index.get(&neighbor_id) {
384                        neighbors[i].push(j);
385                    }
386                }
387            }
388        }
389
390        // Initialize scores
391        let initial_score = 1.0 / (n as f64).sqrt();
392        let mut scores: Vec<f64> = vec![initial_score; n];
393        let mut new_scores: Vec<f64> = vec![0.0; n];
394
395        let mut iterations = 0;
396        let mut converged = false;
397        let mut final_delta = f64::MAX;
398
399        // Power iteration
400        while iterations < config.max_iterations {
401            iterations += 1;
402
403            new_scores.fill(0.0);
404
405            for (i, neighbor_list) in neighbors.iter().enumerate() {
406                for &j in neighbor_list {
407                    new_scores[i] += scores[j];
408                }
409            }
410
411            let norm: f64 = new_scores.iter().map(|&x| x * x).sum::<f64>().sqrt();
412
413            if norm < f64::EPSILON {
414                let uniform_score = 1.0 / (n as f64).sqrt();
415                new_scores.fill(uniform_score);
416            } else {
417                for score in &mut new_scores {
418                    *score /= norm;
419                }
420            }
421
422            let mut max_delta = 0.0f64;
423            for i in 0..n {
424                let delta = (new_scores[i] - scores[i]).abs();
425                max_delta = max_delta.max(delta);
426            }
427
428            final_delta = max_delta;
429            std::mem::swap(&mut scores, &mut new_scores);
430
431            if max_delta < config.tolerance {
432                converged = true;
433                break;
434            }
435        }
436
437        if config.normalize {
438            let norm: f64 = scores.iter().map(|&x| x * x).sum::<f64>().sqrt();
439            if norm > f64::EPSILON {
440                for score in &mut scores {
441                    *score /= norm;
442                }
443            }
444        }
445
446        let scores_map: HashMap<EntityId, f64> = nodes.iter().copied().zip(scores).collect();
447
448        Ok(EigenvectorCentralityResult { scores: scores_map, iterations, converged, final_delta })
449    }
450
451    /// Get neighbors of a node based on direction.
452    fn get_neighbors<T: Transaction>(
453        tx: &T,
454        node: EntityId,
455        direction: Direction,
456    ) -> GraphResult<Vec<EntityId>> {
457        const INITIAL_NEIGHBORS_CAPACITY: usize = 16;
458        let mut neighbors = Vec::with_capacity(INITIAL_NEIGHBORS_CAPACITY);
459
460        if direction.includes_outgoing() {
461            let edges = AdjacencyIndex::get_outgoing_edge_ids(tx, node)?;
462            for edge_id in edges {
463                if let Some(edge) = EdgeStore::get(tx, edge_id)? {
464                    neighbors.push(edge.target);
465                }
466            }
467        }
468
469        if direction.includes_incoming() {
470            let edges = AdjacencyIndex::get_incoming_edge_ids(tx, node)?;
471            for edge_id in edges {
472                if let Some(edge) = EdgeStore::get(tx, edge_id)? {
473                    neighbors.push(edge.source);
474                }
475            }
476        }
477
478        Ok(neighbors)
479    }
480}
481
482#[cfg(test)]
483mod tests {
484    use super::*;
485
486    #[test]
487    fn config_defaults() {
488        let config = EigenvectorCentralityConfig::default();
489        assert_eq!(config.direction, Direction::Both);
490        assert_eq!(config.max_iterations, 100);
491        assert!((config.tolerance - 1e-6).abs() < f64::EPSILON);
492        assert!(config.normalize);
493        assert_eq!(config.max_graph_nodes, Some(DEFAULT_MAX_GRAPH_NODES));
494    }
495
496    #[test]
497    fn config_builder() {
498        let config = EigenvectorCentralityConfig::new()
499            .with_direction(Direction::Outgoing)
500            .with_max_iterations(50)
501            .with_tolerance(1e-8)
502            .with_normalize(false)
503            .with_max_graph_nodes(Some(1000));
504
505        assert_eq!(config.direction, Direction::Outgoing);
506        assert_eq!(config.max_iterations, 50);
507        assert!((config.tolerance - 1e-8).abs() < f64::EPSILON);
508        assert!(!config.normalize);
509        assert_eq!(config.max_graph_nodes, Some(1000));
510    }
511
512    #[test]
513    fn result_empty() {
514        let result = EigenvectorCentralityResult {
515            scores: HashMap::new(),
516            iterations: 0,
517            converged: true,
518            final_delta: 0.0,
519        };
520
521        assert!(result.score(EntityId::new(1)).is_none());
522        assert!(result.sorted().is_empty());
523        assert!(result.top_n(10).is_empty());
524        assert!(result.max().is_none());
525        assert!(result.min().is_none());
526        assert!((result.mean() - 0.0).abs() < f64::EPSILON);
527    }
528
529    #[test]
530    fn result_sorted() {
531        let mut scores = HashMap::new();
532        scores.insert(EntityId::new(1), 0.3);
533        scores.insert(EntityId::new(2), 0.5);
534        scores.insert(EntityId::new(3), 0.2);
535
536        let result = EigenvectorCentralityResult {
537            scores,
538            iterations: 10,
539            converged: true,
540            final_delta: 1e-7,
541        };
542
543        let sorted = result.sorted();
544        assert_eq!(sorted.len(), 3);
545        assert_eq!(sorted[0].0, EntityId::new(2)); // highest
546        assert_eq!(sorted[1].0, EntityId::new(1));
547        assert_eq!(sorted[2].0, EntityId::new(3)); // lowest
548    }
549
550    #[test]
551    fn result_top_n() {
552        let mut scores = HashMap::new();
553        scores.insert(EntityId::new(1), 0.3);
554        scores.insert(EntityId::new(2), 0.5);
555        scores.insert(EntityId::new(3), 0.2);
556
557        let result = EigenvectorCentralityResult {
558            scores,
559            iterations: 10,
560            converged: true,
561            final_delta: 1e-7,
562        };
563
564        let top2 = result.top_n(2);
565        assert_eq!(top2.len(), 2);
566        assert_eq!(top2[0].0, EntityId::new(2));
567        assert_eq!(top2[1].0, EntityId::new(1));
568    }
569
570    #[test]
571    fn result_max_min() {
572        let mut scores = HashMap::new();
573        scores.insert(EntityId::new(1), 0.3);
574        scores.insert(EntityId::new(2), 0.5);
575        scores.insert(EntityId::new(3), 0.2);
576
577        let result = EigenvectorCentralityResult {
578            scores,
579            iterations: 10,
580            converged: true,
581            final_delta: 1e-7,
582        };
583
584        let max = result.max().unwrap();
585        assert_eq!(max.0, EntityId::new(2));
586        assert!((max.1 - 0.5).abs() < f64::EPSILON);
587
588        let min = result.min().unwrap();
589        assert_eq!(min.0, EntityId::new(3));
590        assert!((min.1 - 0.2).abs() < f64::EPSILON);
591    }
592
593    #[test]
594    fn result_mean() {
595        let mut scores = HashMap::new();
596        scores.insert(EntityId::new(1), 0.3);
597        scores.insert(EntityId::new(2), 0.6);
598        scores.insert(EntityId::new(3), 0.9);
599
600        let result = EigenvectorCentralityResult {
601            scores,
602            iterations: 10,
603            converged: true,
604            final_delta: 1e-7,
605        };
606
607        assert!((result.mean() - 0.6).abs() < f64::EPSILON);
608    }
609}