Skip to main content

manifoldb_graph/analytics/
centrality.rs

1//! Betweenness Centrality implementation using Brandes algorithm.
2//!
3//! Betweenness centrality measures the extent to which a node lies on paths
4//! between other nodes. Nodes with high betweenness centrality act as bridges
5//! or bottlenecks in the network.
6//!
7//! # Algorithm
8//!
9//! This module implements Brandes' algorithm (2001) for efficient computation
10//! of betweenness centrality. The algorithm runs in O(V*E) time for unweighted
11//! graphs and O(V*E + V^2*log(V)) for weighted graphs.
12//!
13//! # Formula
14//!
15//! BC(v) = Σ (σ_st(v) / σ_st) for all s≠v≠t
16//!
17//! Where:
18//! - σ_st is the total number of shortest paths from s to t
19//! - σ_st(v) is the number of those paths passing through v
20//!
21//! # Example
22//!
23//! ```ignore
24//! use manifoldb_graph::analytics::{BetweennessCentrality, BetweennessCentralityConfig};
25//!
26//! let config = BetweennessCentralityConfig::default();
27//! let result = BetweennessCentrality::compute(&tx, &config)?;
28//!
29//! // Find the most central nodes (bridges/bottlenecks)
30//! for (node, score) in result.top_n(10) {
31//!     println!("Node {:?} has betweenness centrality {:.4}", node, score);
32//! }
33//! ```
34
35use std::collections::{HashMap, VecDeque};
36
37use manifoldb_core::EntityId;
38use manifoldb_storage::Transaction;
39
40use crate::index::AdjacencyIndex;
41use crate::store::{EdgeStore, GraphError, GraphResult, NodeStore};
42use crate::traversal::Direction;
43
44use super::pagerank::DEFAULT_MAX_GRAPH_NODES;
45
46/// Configuration for Betweenness Centrality computation.
47#[derive(Debug, Clone)]
48pub struct BetweennessCentralityConfig {
49    /// Whether to normalize centrality values to [0, 1].
50    /// Default: true
51    pub normalize: bool,
52
53    /// Direction of edges to follow.
54    /// Default: Both (treat as undirected)
55    pub direction: Direction,
56
57    /// Whether to include endpoints in the centrality calculation.
58    /// Default: false
59    pub include_endpoints: bool,
60
61    /// Maximum number of nodes allowed before returning an error.
62    /// Set to `None` to disable the check.
63    /// Default: 10,000,000 (10M nodes)
64    pub max_graph_nodes: Option<usize>,
65}
66
67impl Default for BetweennessCentralityConfig {
68    fn default() -> Self {
69        Self {
70            normalize: true,
71            direction: Direction::Both,
72            include_endpoints: false,
73            max_graph_nodes: Some(DEFAULT_MAX_GRAPH_NODES),
74        }
75    }
76}
77
78impl BetweennessCentralityConfig {
79    /// Create a new configuration with default values.
80    pub fn new() -> Self {
81        Self::default()
82    }
83
84    /// Set whether to normalize centrality values.
85    ///
86    /// When normalized, values are scaled to [0, 1] by dividing by
87    /// (n-1)*(n-2)/2 for undirected or (n-1)*(n-2) for directed graphs.
88    pub const fn with_normalize(mut self, normalize: bool) -> Self {
89        self.normalize = normalize;
90        self
91    }
92
93    /// Set the direction to follow edges.
94    ///
95    /// - `Outgoing`: Follow edges in their natural direction
96    /// - `Incoming`: Follow edges in reverse
97    /// - `Both`: Treat graph as undirected (default)
98    pub const fn with_direction(mut self, direction: Direction) -> Self {
99        self.direction = direction;
100        self
101    }
102
103    /// Set whether to include endpoints in centrality calculation.
104    ///
105    /// When true, source and target nodes also contribute to the
106    /// centrality of intermediate nodes.
107    pub const fn with_include_endpoints(mut self, include: bool) -> Self {
108        self.include_endpoints = include;
109        self
110    }
111
112    /// Set the maximum number of nodes allowed.
113    ///
114    /// If the graph has more nodes than this limit, the algorithm will
115    /// return a [`GraphError::GraphTooLarge`] error instead of attempting
116    /// to allocate potentially gigabytes of memory.
117    ///
118    /// Set to `None` to disable the check (use with caution).
119    ///
120    /// [`GraphError::GraphTooLarge`]: crate::store::GraphError::GraphTooLarge
121    pub const fn with_max_graph_nodes(mut self, limit: Option<usize>) -> Self {
122        self.max_graph_nodes = limit;
123        self
124    }
125}
126
127/// Result of Betweenness Centrality computation.
128#[derive(Debug, Clone)]
129pub struct CentralityResult {
130    /// Centrality scores for each node.
131    pub scores: HashMap<EntityId, f64>,
132
133    /// Whether scores are normalized.
134    pub normalized: bool,
135}
136
137impl CentralityResult {
138    /// Get the centrality score for a specific node.
139    pub fn score(&self, node: EntityId) -> Option<f64> {
140        self.scores.get(&node).copied()
141    }
142
143    /// Get nodes sorted by centrality score (descending).
144    pub fn sorted(&self) -> Vec<(EntityId, f64)> {
145        let mut pairs: Vec<_> = self.scores.iter().map(|(&id, &score)| (id, score)).collect();
146        pairs.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
147        pairs
148    }
149
150    /// Get the top N nodes by centrality score.
151    pub fn top_n(&self, n: usize) -> Vec<(EntityId, f64)> {
152        self.sorted().into_iter().take(n).collect()
153    }
154
155    /// Get the node with the highest centrality score.
156    pub fn max(&self) -> Option<(EntityId, f64)> {
157        self.scores
158            .iter()
159            .max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
160            .map(|(&id, &score)| (id, score))
161    }
162
163    /// Get the node with the lowest centrality score.
164    pub fn min(&self) -> Option<(EntityId, f64)> {
165        self.scores
166            .iter()
167            .min_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
168            .map(|(&id, &score)| (id, score))
169    }
170
171    /// Get the mean centrality score.
172    pub fn mean(&self) -> f64 {
173        if self.scores.is_empty() {
174            return 0.0;
175        }
176        self.scores.values().sum::<f64>() / self.scores.len() as f64
177    }
178}
179
180/// Betweenness Centrality algorithm implementation.
181///
182/// Betweenness centrality quantifies the number of times a node acts as a
183/// bridge along the shortest path between two other nodes.
184pub struct BetweennessCentrality;
185
186impl BetweennessCentrality {
187    /// Compute betweenness centrality for all nodes in the graph.
188    ///
189    /// Uses Brandes' algorithm for efficient O(V*E) computation.
190    ///
191    /// # Arguments
192    ///
193    /// * `tx` - The transaction to use for graph access
194    /// * `config` - Configuration parameters for the algorithm
195    ///
196    /// # Returns
197    ///
198    /// A `CentralityResult` containing scores for all nodes.
199    pub fn compute<T: Transaction>(
200        tx: &T,
201        config: &BetweennessCentralityConfig,
202    ) -> GraphResult<CentralityResult> {
203        // Check graph size before allocating large data structures
204        if let Some(limit) = config.max_graph_nodes {
205            let node_count = NodeStore::count(tx)?;
206            if node_count > limit {
207                return Err(GraphError::GraphTooLarge { node_count, limit });
208            }
209        }
210
211        // Collect all nodes
212        let mut nodes: Vec<EntityId> = Vec::new();
213        NodeStore::for_each(tx, |entity| {
214            nodes.push(entity.id);
215            true
216        })?;
217
218        let n = nodes.len();
219        if n == 0 {
220            return Ok(CentralityResult { scores: HashMap::new(), normalized: config.normalize });
221        }
222
223        // Build node index for fast lookup - pre-allocate with known size
224        let mut node_index: HashMap<EntityId, usize> = HashMap::with_capacity(n);
225        for (i, &id) in nodes.iter().enumerate() {
226            node_index.insert(id, i);
227        }
228
229        // Build adjacency lists - pre-allocate with typical degree estimate
230        const AVG_DEGREE_ESTIMATE: usize = 8;
231        let mut neighbors: Vec<Vec<usize>> =
232            (0..n).map(|_| Vec::with_capacity(AVG_DEGREE_ESTIMATE)).collect();
233        for (i, &node) in nodes.iter().enumerate() {
234            let neighbor_ids = Self::get_neighbors(tx, node, config.direction)?;
235            for neighbor_id in neighbor_ids {
236                if let Some(&j) = node_index.get(&neighbor_id) {
237                    neighbors[i].push(j);
238                }
239            }
240        }
241
242        // Initialize centrality scores
243        let mut centrality: Vec<f64> = vec![0.0; n];
244
245        // Brandes' algorithm: process each node as source
246        for s in 0..n {
247            // Single-source shortest-paths problem
248            let mut stack: Vec<usize> = Vec::new();
249            let mut predecessors: Vec<Vec<usize>> = vec![Vec::new(); n];
250            let mut sigma: Vec<f64> = vec![0.0; n]; // Number of shortest paths
251            sigma[s] = 1.0;
252            let mut dist: Vec<i64> = vec![-1; n]; // Distance from source
253            dist[s] = 0;
254
255            // BFS
256            let mut queue: VecDeque<usize> = VecDeque::new();
257            queue.push_back(s);
258
259            while let Some(v) = queue.pop_front() {
260                stack.push(v);
261
262                for &w in &neighbors[v] {
263                    // Path discovery
264                    if dist[w] < 0 {
265                        dist[w] = dist[v] + 1;
266                        queue.push_back(w);
267                    }
268
269                    // Path counting
270                    if dist[w] == dist[v] + 1 {
271                        sigma[w] += sigma[v];
272                        predecessors[w].push(v);
273                    }
274                }
275            }
276
277            // Accumulation phase
278            let mut delta: Vec<f64> = vec![0.0; n];
279
280            while let Some(w) = stack.pop() {
281                for &v in &predecessors[w] {
282                    delta[v] += (sigma[v] / sigma[w]) * (1.0 + delta[w]);
283                }
284                if w != s {
285                    centrality[w] += delta[w];
286                }
287            }
288        }
289
290        // For undirected graphs, divide by 2 (each path counted twice)
291        if config.direction == Direction::Both {
292            for score in &mut centrality {
293                *score /= 2.0;
294            }
295        }
296
297        // Normalize if requested
298        if config.normalize && n > 2 {
299            let normalization_factor = if config.direction == Direction::Both {
300                // Undirected: (n-1)(n-2)/2
301                2.0 / ((n - 1) * (n - 2)) as f64
302            } else {
303                // Directed: (n-1)(n-2)
304                1.0 / ((n - 1) * (n - 2)) as f64
305            };
306
307            for score in &mut centrality {
308                *score *= normalization_factor;
309            }
310        }
311
312        // Build result map
313        let scores: HashMap<EntityId, f64> = nodes.into_iter().zip(centrality).collect();
314
315        Ok(CentralityResult { scores, normalized: config.normalize })
316    }
317
318    /// Compute betweenness centrality for a subset of nodes.
319    ///
320    /// Only considers shortest paths within the specified subgraph.
321    ///
322    /// # Arguments
323    ///
324    /// * `tx` - The transaction to use for graph access
325    /// * `nodes` - The nodes to include in the computation
326    /// * `config` - Configuration parameters for the algorithm
327    pub fn compute_for_nodes<T: Transaction>(
328        tx: &T,
329        nodes: &[EntityId],
330        config: &BetweennessCentralityConfig,
331    ) -> GraphResult<CentralityResult> {
332        let n = nodes.len();
333        if n == 0 {
334            return Ok(CentralityResult { scores: HashMap::new(), normalized: config.normalize });
335        }
336
337        // Build node index and set for fast lookup - pre-allocate with known size
338        let mut node_set: std::collections::HashSet<EntityId> =
339            std::collections::HashSet::with_capacity(n);
340        for &id in nodes {
341            node_set.insert(id);
342        }
343        let mut node_index: HashMap<EntityId, usize> = HashMap::with_capacity(n);
344        for (i, &id) in nodes.iter().enumerate() {
345            node_index.insert(id, i);
346        }
347
348        // Build adjacency lists (only including edges within the subgraph)
349        // Pre-allocate with typical degree estimate
350        const AVG_DEGREE_ESTIMATE: usize = 8;
351        let mut neighbors: Vec<Vec<usize>> =
352            (0..n).map(|_| Vec::with_capacity(AVG_DEGREE_ESTIMATE)).collect();
353        for (i, &node) in nodes.iter().enumerate() {
354            let neighbor_ids = Self::get_neighbors(tx, node, config.direction)?;
355            for neighbor_id in neighbor_ids {
356                if node_set.contains(&neighbor_id) {
357                    if let Some(&j) = node_index.get(&neighbor_id) {
358                        neighbors[i].push(j);
359                    }
360                }
361            }
362        }
363
364        // Initialize centrality scores
365        let mut centrality: Vec<f64> = vec![0.0; n];
366
367        // Brandes' algorithm
368        for s in 0..n {
369            let mut stack: Vec<usize> = Vec::new();
370            let mut predecessors: Vec<Vec<usize>> = vec![Vec::new(); n];
371            let mut sigma: Vec<f64> = vec![0.0; n];
372            sigma[s] = 1.0;
373            let mut dist: Vec<i64> = vec![-1; n];
374            dist[s] = 0;
375
376            let mut queue: VecDeque<usize> = VecDeque::new();
377            queue.push_back(s);
378
379            while let Some(v) = queue.pop_front() {
380                stack.push(v);
381
382                for &w in &neighbors[v] {
383                    if dist[w] < 0 {
384                        dist[w] = dist[v] + 1;
385                        queue.push_back(w);
386                    }
387
388                    if dist[w] == dist[v] + 1 {
389                        sigma[w] += sigma[v];
390                        predecessors[w].push(v);
391                    }
392                }
393            }
394
395            let mut delta: Vec<f64> = vec![0.0; n];
396
397            while let Some(w) = stack.pop() {
398                for &v in &predecessors[w] {
399                    delta[v] += (sigma[v] / sigma[w]) * (1.0 + delta[w]);
400                }
401                if w != s {
402                    centrality[w] += delta[w];
403                }
404            }
405        }
406
407        // For undirected graphs, divide by 2
408        if config.direction == Direction::Both {
409            for score in &mut centrality {
410                *score /= 2.0;
411            }
412        }
413
414        // Normalize if requested
415        if config.normalize && n > 2 {
416            let normalization_factor = if config.direction == Direction::Both {
417                2.0 / ((n - 1) * (n - 2)) as f64
418            } else {
419                1.0 / ((n - 1) * (n - 2)) as f64
420            };
421
422            for score in &mut centrality {
423                *score *= normalization_factor;
424            }
425        }
426
427        // Build result map
428        let scores: HashMap<EntityId, f64> = nodes.iter().copied().zip(centrality).collect();
429
430        Ok(CentralityResult { scores, normalized: config.normalize })
431    }
432
433    /// Get neighbors of a node based on direction.
434    fn get_neighbors<T: Transaction>(
435        tx: &T,
436        node: EntityId,
437        direction: Direction,
438    ) -> GraphResult<Vec<EntityId>> {
439        // Pre-allocate for typical node degree
440        const INITIAL_NEIGHBORS_CAPACITY: usize = 16;
441
442        let mut neighbors = Vec::with_capacity(INITIAL_NEIGHBORS_CAPACITY);
443
444        if direction.includes_outgoing() {
445            let edges = AdjacencyIndex::get_outgoing_edge_ids(tx, node)?;
446            for edge_id in edges {
447                if let Some(edge) = EdgeStore::get(tx, edge_id)? {
448                    neighbors.push(edge.target);
449                }
450            }
451        }
452
453        if direction.includes_incoming() {
454            let edges = AdjacencyIndex::get_incoming_edge_ids(tx, node)?;
455            for edge_id in edges {
456                if let Some(edge) = EdgeStore::get(tx, edge_id)? {
457                    neighbors.push(edge.source);
458                }
459            }
460        }
461
462        Ok(neighbors)
463    }
464}
465
466#[cfg(test)]
467mod tests {
468    use super::*;
469
470    #[test]
471    fn config_defaults() {
472        let config = BetweennessCentralityConfig::default();
473        assert!(config.normalize);
474        assert_eq!(config.direction, Direction::Both);
475        assert!(!config.include_endpoints);
476    }
477
478    #[test]
479    fn config_builder() {
480        let config = BetweennessCentralityConfig::new()
481            .with_normalize(false)
482            .with_direction(Direction::Outgoing)
483            .with_include_endpoints(true);
484
485        assert!(!config.normalize);
486        assert_eq!(config.direction, Direction::Outgoing);
487        assert!(config.include_endpoints);
488    }
489
490    #[test]
491    fn result_empty() {
492        let result = CentralityResult { scores: HashMap::new(), normalized: true };
493
494        assert!(result.score(EntityId::new(1)).is_none());
495        assert!(result.sorted().is_empty());
496        assert!(result.top_n(10).is_empty());
497        assert!(result.max().is_none());
498        assert!(result.min().is_none());
499        assert!((result.mean() - 0.0).abs() < f64::EPSILON);
500    }
501
502    #[test]
503    fn result_sorted() {
504        let mut scores = HashMap::new();
505        scores.insert(EntityId::new(1), 0.3);
506        scores.insert(EntityId::new(2), 0.5);
507        scores.insert(EntityId::new(3), 0.2);
508
509        let result = CentralityResult { scores, normalized: true };
510
511        let sorted = result.sorted();
512        assert_eq!(sorted.len(), 3);
513        assert_eq!(sorted[0].0, EntityId::new(2)); // highest
514        assert_eq!(sorted[1].0, EntityId::new(1));
515        assert_eq!(sorted[2].0, EntityId::new(3)); // lowest
516    }
517
518    #[test]
519    fn result_mean() {
520        let mut scores = HashMap::new();
521        scores.insert(EntityId::new(1), 0.3);
522        scores.insert(EntityId::new(2), 0.6);
523        scores.insert(EntityId::new(3), 0.9);
524
525        let result = CentralityResult { scores, normalized: true };
526
527        assert!((result.mean() - 0.6).abs() < f64::EPSILON);
528    }
529}