manifoldb_graph/analytics/
centrality.rs1use std::collections::{HashMap, VecDeque};
36
37use manifoldb_core::EntityId;
38use manifoldb_storage::Transaction;
39
40use crate::index::AdjacencyIndex;
41use crate::store::{EdgeStore, GraphError, GraphResult, NodeStore};
42use crate::traversal::Direction;
43
44use super::pagerank::DEFAULT_MAX_GRAPH_NODES;
45
46#[derive(Debug, Clone)]
48pub struct BetweennessCentralityConfig {
49 pub normalize: bool,
52
53 pub direction: Direction,
56
57 pub include_endpoints: bool,
60
61 pub max_graph_nodes: Option<usize>,
65}
66
67impl Default for BetweennessCentralityConfig {
68 fn default() -> Self {
69 Self {
70 normalize: true,
71 direction: Direction::Both,
72 include_endpoints: false,
73 max_graph_nodes: Some(DEFAULT_MAX_GRAPH_NODES),
74 }
75 }
76}
77
78impl BetweennessCentralityConfig {
79 pub fn new() -> Self {
81 Self::default()
82 }
83
84 pub const fn with_normalize(mut self, normalize: bool) -> Self {
89 self.normalize = normalize;
90 self
91 }
92
93 pub const fn with_direction(mut self, direction: Direction) -> Self {
99 self.direction = direction;
100 self
101 }
102
103 pub const fn with_include_endpoints(mut self, include: bool) -> Self {
108 self.include_endpoints = include;
109 self
110 }
111
112 pub const fn with_max_graph_nodes(mut self, limit: Option<usize>) -> Self {
122 self.max_graph_nodes = limit;
123 self
124 }
125}
126
127#[derive(Debug, Clone)]
129pub struct CentralityResult {
130 pub scores: HashMap<EntityId, f64>,
132
133 pub normalized: bool,
135}
136
137impl CentralityResult {
138 pub fn score(&self, node: EntityId) -> Option<f64> {
140 self.scores.get(&node).copied()
141 }
142
143 pub fn sorted(&self) -> Vec<(EntityId, f64)> {
145 let mut pairs: Vec<_> = self.scores.iter().map(|(&id, &score)| (id, score)).collect();
146 pairs.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
147 pairs
148 }
149
150 pub fn top_n(&self, n: usize) -> Vec<(EntityId, f64)> {
152 self.sorted().into_iter().take(n).collect()
153 }
154
155 pub fn max(&self) -> Option<(EntityId, f64)> {
157 self.scores
158 .iter()
159 .max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
160 .map(|(&id, &score)| (id, score))
161 }
162
163 pub fn min(&self) -> Option<(EntityId, f64)> {
165 self.scores
166 .iter()
167 .min_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
168 .map(|(&id, &score)| (id, score))
169 }
170
171 pub fn mean(&self) -> f64 {
173 if self.scores.is_empty() {
174 return 0.0;
175 }
176 self.scores.values().sum::<f64>() / self.scores.len() as f64
177 }
178}
179
180pub struct BetweennessCentrality;
185
186impl BetweennessCentrality {
187 pub fn compute<T: Transaction>(
200 tx: &T,
201 config: &BetweennessCentralityConfig,
202 ) -> GraphResult<CentralityResult> {
203 if let Some(limit) = config.max_graph_nodes {
205 let node_count = NodeStore::count(tx)?;
206 if node_count > limit {
207 return Err(GraphError::GraphTooLarge { node_count, limit });
208 }
209 }
210
211 let mut nodes: Vec<EntityId> = Vec::new();
213 NodeStore::for_each(tx, |entity| {
214 nodes.push(entity.id);
215 true
216 })?;
217
218 let n = nodes.len();
219 if n == 0 {
220 return Ok(CentralityResult { scores: HashMap::new(), normalized: config.normalize });
221 }
222
223 let mut node_index: HashMap<EntityId, usize> = HashMap::with_capacity(n);
225 for (i, &id) in nodes.iter().enumerate() {
226 node_index.insert(id, i);
227 }
228
229 const AVG_DEGREE_ESTIMATE: usize = 8;
231 let mut neighbors: Vec<Vec<usize>> =
232 (0..n).map(|_| Vec::with_capacity(AVG_DEGREE_ESTIMATE)).collect();
233 for (i, &node) in nodes.iter().enumerate() {
234 let neighbor_ids = Self::get_neighbors(tx, node, config.direction)?;
235 for neighbor_id in neighbor_ids {
236 if let Some(&j) = node_index.get(&neighbor_id) {
237 neighbors[i].push(j);
238 }
239 }
240 }
241
242 let mut centrality: Vec<f64> = vec![0.0; n];
244
245 for s in 0..n {
247 let mut stack: Vec<usize> = Vec::new();
249 let mut predecessors: Vec<Vec<usize>> = vec![Vec::new(); n];
250 let mut sigma: Vec<f64> = vec![0.0; n]; sigma[s] = 1.0;
252 let mut dist: Vec<i64> = vec![-1; n]; dist[s] = 0;
254
255 let mut queue: VecDeque<usize> = VecDeque::new();
257 queue.push_back(s);
258
259 while let Some(v) = queue.pop_front() {
260 stack.push(v);
261
262 for &w in &neighbors[v] {
263 if dist[w] < 0 {
265 dist[w] = dist[v] + 1;
266 queue.push_back(w);
267 }
268
269 if dist[w] == dist[v] + 1 {
271 sigma[w] += sigma[v];
272 predecessors[w].push(v);
273 }
274 }
275 }
276
277 let mut delta: Vec<f64> = vec![0.0; n];
279
280 while let Some(w) = stack.pop() {
281 for &v in &predecessors[w] {
282 delta[v] += (sigma[v] / sigma[w]) * (1.0 + delta[w]);
283 }
284 if w != s {
285 centrality[w] += delta[w];
286 }
287 }
288 }
289
290 if config.direction == Direction::Both {
292 for score in &mut centrality {
293 *score /= 2.0;
294 }
295 }
296
297 if config.normalize && n > 2 {
299 let normalization_factor = if config.direction == Direction::Both {
300 2.0 / ((n - 1) * (n - 2)) as f64
302 } else {
303 1.0 / ((n - 1) * (n - 2)) as f64
305 };
306
307 for score in &mut centrality {
308 *score *= normalization_factor;
309 }
310 }
311
312 let scores: HashMap<EntityId, f64> = nodes.into_iter().zip(centrality).collect();
314
315 Ok(CentralityResult { scores, normalized: config.normalize })
316 }
317
318 pub fn compute_for_nodes<T: Transaction>(
328 tx: &T,
329 nodes: &[EntityId],
330 config: &BetweennessCentralityConfig,
331 ) -> GraphResult<CentralityResult> {
332 let n = nodes.len();
333 if n == 0 {
334 return Ok(CentralityResult { scores: HashMap::new(), normalized: config.normalize });
335 }
336
337 let mut node_set: std::collections::HashSet<EntityId> =
339 std::collections::HashSet::with_capacity(n);
340 for &id in nodes {
341 node_set.insert(id);
342 }
343 let mut node_index: HashMap<EntityId, usize> = HashMap::with_capacity(n);
344 for (i, &id) in nodes.iter().enumerate() {
345 node_index.insert(id, i);
346 }
347
348 const AVG_DEGREE_ESTIMATE: usize = 8;
351 let mut neighbors: Vec<Vec<usize>> =
352 (0..n).map(|_| Vec::with_capacity(AVG_DEGREE_ESTIMATE)).collect();
353 for (i, &node) in nodes.iter().enumerate() {
354 let neighbor_ids = Self::get_neighbors(tx, node, config.direction)?;
355 for neighbor_id in neighbor_ids {
356 if node_set.contains(&neighbor_id) {
357 if let Some(&j) = node_index.get(&neighbor_id) {
358 neighbors[i].push(j);
359 }
360 }
361 }
362 }
363
364 let mut centrality: Vec<f64> = vec![0.0; n];
366
367 for s in 0..n {
369 let mut stack: Vec<usize> = Vec::new();
370 let mut predecessors: Vec<Vec<usize>> = vec![Vec::new(); n];
371 let mut sigma: Vec<f64> = vec![0.0; n];
372 sigma[s] = 1.0;
373 let mut dist: Vec<i64> = vec![-1; n];
374 dist[s] = 0;
375
376 let mut queue: VecDeque<usize> = VecDeque::new();
377 queue.push_back(s);
378
379 while let Some(v) = queue.pop_front() {
380 stack.push(v);
381
382 for &w in &neighbors[v] {
383 if dist[w] < 0 {
384 dist[w] = dist[v] + 1;
385 queue.push_back(w);
386 }
387
388 if dist[w] == dist[v] + 1 {
389 sigma[w] += sigma[v];
390 predecessors[w].push(v);
391 }
392 }
393 }
394
395 let mut delta: Vec<f64> = vec![0.0; n];
396
397 while let Some(w) = stack.pop() {
398 for &v in &predecessors[w] {
399 delta[v] += (sigma[v] / sigma[w]) * (1.0 + delta[w]);
400 }
401 if w != s {
402 centrality[w] += delta[w];
403 }
404 }
405 }
406
407 if config.direction == Direction::Both {
409 for score in &mut centrality {
410 *score /= 2.0;
411 }
412 }
413
414 if config.normalize && n > 2 {
416 let normalization_factor = if config.direction == Direction::Both {
417 2.0 / ((n - 1) * (n - 2)) as f64
418 } else {
419 1.0 / ((n - 1) * (n - 2)) as f64
420 };
421
422 for score in &mut centrality {
423 *score *= normalization_factor;
424 }
425 }
426
427 let scores: HashMap<EntityId, f64> = nodes.iter().copied().zip(centrality).collect();
429
430 Ok(CentralityResult { scores, normalized: config.normalize })
431 }
432
433 fn get_neighbors<T: Transaction>(
435 tx: &T,
436 node: EntityId,
437 direction: Direction,
438 ) -> GraphResult<Vec<EntityId>> {
439 const INITIAL_NEIGHBORS_CAPACITY: usize = 16;
441
442 let mut neighbors = Vec::with_capacity(INITIAL_NEIGHBORS_CAPACITY);
443
444 if direction.includes_outgoing() {
445 let edges = AdjacencyIndex::get_outgoing_edge_ids(tx, node)?;
446 for edge_id in edges {
447 if let Some(edge) = EdgeStore::get(tx, edge_id)? {
448 neighbors.push(edge.target);
449 }
450 }
451 }
452
453 if direction.includes_incoming() {
454 let edges = AdjacencyIndex::get_incoming_edge_ids(tx, node)?;
455 for edge_id in edges {
456 if let Some(edge) = EdgeStore::get(tx, edge_id)? {
457 neighbors.push(edge.source);
458 }
459 }
460 }
461
462 Ok(neighbors)
463 }
464}
465
466#[cfg(test)]
467mod tests {
468 use super::*;
469
470 #[test]
471 fn config_defaults() {
472 let config = BetweennessCentralityConfig::default();
473 assert!(config.normalize);
474 assert_eq!(config.direction, Direction::Both);
475 assert!(!config.include_endpoints);
476 }
477
478 #[test]
479 fn config_builder() {
480 let config = BetweennessCentralityConfig::new()
481 .with_normalize(false)
482 .with_direction(Direction::Outgoing)
483 .with_include_endpoints(true);
484
485 assert!(!config.normalize);
486 assert_eq!(config.direction, Direction::Outgoing);
487 assert!(config.include_endpoints);
488 }
489
490 #[test]
491 fn result_empty() {
492 let result = CentralityResult { scores: HashMap::new(), normalized: true };
493
494 assert!(result.score(EntityId::new(1)).is_none());
495 assert!(result.sorted().is_empty());
496 assert!(result.top_n(10).is_empty());
497 assert!(result.max().is_none());
498 assert!(result.min().is_none());
499 assert!((result.mean() - 0.0).abs() < f64::EPSILON);
500 }
501
502 #[test]
503 fn result_sorted() {
504 let mut scores = HashMap::new();
505 scores.insert(EntityId::new(1), 0.3);
506 scores.insert(EntityId::new(2), 0.5);
507 scores.insert(EntityId::new(3), 0.2);
508
509 let result = CentralityResult { scores, normalized: true };
510
511 let sorted = result.sorted();
512 assert_eq!(sorted.len(), 3);
513 assert_eq!(sorted[0].0, EntityId::new(2)); assert_eq!(sorted[1].0, EntityId::new(1));
515 assert_eq!(sorted[2].0, EntityId::new(3)); }
517
518 #[test]
519 fn result_mean() {
520 let mut scores = HashMap::new();
521 scores.insert(EntityId::new(1), 0.3);
522 scores.insert(EntityId::new(2), 0.6);
523 scores.insert(EntityId::new(3), 0.9);
524
525 let result = CentralityResult { scores, normalized: true };
526
527 assert!((result.mean() - 0.6).abs() < f64::EPSILON);
528 }
529}