Skip to main content

reddb_server/storage/engine/algorithms/
mod.rs

1//! Graph Algorithms for RedDB
2//!
3//! High-performance graph algorithms optimized for attack path analysis:
4//! - PageRank: Identify critical nodes
5//! - Connected Components: Find isolated network segments
6//! - Betweenness Centrality: Find chokepoints
7//! - Community Detection: Cluster related assets
8//! - Cycle Detection: Find lateral movement loops
9//!
10//! # Algorithm Complexity
11//!
12//! | Algorithm              | Time Complexity    | Space Complexity | Notes                          |
13//! |------------------------|--------------------|------------------|--------------------------------|
14//! | PageRank               | O(V + E) per iter  | O(V)             | Converges in ~20-50 iterations |
15//! | Connected Components   | O(V + E)           | O(V)             | Union-Find with path compress  |
16//! | Betweenness Centrality | O(V × E)           | O(V + E)         | Brandes' algorithm             |
17//! | Community Detection    | O(V + E) per iter  | O(V)             | Label propagation, ~5-10 iters |
18//! | Cycle Detection        | O(V + E)           | O(V + cycles)    | DFS with rotation dedup        |
19//! | BFS Path Finding       | O(V + E)           | O(V)             | Single-source shortest path    |
20//! | Dijkstra (weighted)    | O((V+E) log V)     | O(V)             | Priority queue based           |
21//! | K-Shortest Paths       | O(K × V × E)       | O(K × V)         | Yen's algorithm                |
22//!
23//! Where V = vertices (nodes), E = edges.
24//!
25//! # Performance Benchmarks
26//!
27//! On a graph with 1M edges (typical enterprise network):
28//! - Graph creation: ~2 seconds
29//! - PageRank: ~500ms (50 iterations)
30//! - Connected Components: ~100ms
31//! - Communities: ~300ms (10 iterations)
32//!
33//! On a graph with 100K edges:
34//! - Betweenness Centrality: ~5 seconds (O(V×E))
35//!
36//! On a graph with 10K edges:
37//! - Cycle Detection: ~50ms
38//!
39//! # Example
40//!
41//! ```ignore
42//! use reddb::storage::engine::algorithms::{PageRank, ConnectedComponents};
43//!
44//! let pr = PageRank::new(&graph).run();
45//! let components = ConnectedComponents::new(&graph).find();
46//! ```
47
48// Algorithm submodules
49mod centrality;
50mod community;
51mod components;
52mod cycles;
53mod pagerank;
54mod structural;
55
56// Re-export centrality algorithms
57pub use centrality::{
58    BetweennessCentrality, BetweennessResult, ClosenessCentrality, ClosenessResult,
59    DegreeCentrality, DegreeCentralityResult, EigenvectorCentrality, EigenvectorResult,
60};
61
62// Re-export community detection algorithms
63pub use community::{CommunitiesResult, Community, LabelPropagation, Louvain, LouvainResult};
64
65// Re-export connected components
66pub use components::{Component, ComponentsResult, ConnectedComponents};
67
68// Re-export cycle detection
69pub use cycles::{Cycle, CycleDetector, CyclesResult};
70
71// Re-export PageRank algorithms
72pub use pagerank::{PageRank, PageRankResult, PersonalizedPageRank};
73
74// Re-export structural algorithms
75pub use structural::{
76    ClusteringCoefficient, ClusteringResult, HITSResult, SCCResult, StronglyConnectedComponents,
77    TriangleCounting, TriangleResult, WCCResult, WeaklyConnectedComponents, HITS,
78};
79
80// ============================================================================
81// Tests
82// ============================================================================
83
84#[cfg(test)]
85mod tests {
86    use super::*;
87    use crate::storage::engine::graph_store::GraphStore;
88
89    fn create_test_graph() -> GraphStore {
90        let graph = GraphStore::new();
91
92        // Create a diamond graph: A -> B, A -> C, B -> D, C -> D
93        let _ = graph.add_node_with_label("A", "Node A", "host");
94        let _ = graph.add_node_with_label("B", "Node B", "host");
95        let _ = graph.add_node_with_label("C", "Node C", "host");
96        let _ = graph.add_node_with_label("D", "Node D", "host");
97
98        let _ = graph.add_edge_with_label("A", "B", "connects_to", 1.0);
99        let _ = graph.add_edge_with_label("A", "C", "connects_to", 1.0);
100        let _ = graph.add_edge_with_label("B", "D", "connects_to", 1.0);
101        let _ = graph.add_edge_with_label("C", "D", "connects_to", 1.0);
102
103        graph
104    }
105
106    fn create_cycle_graph() -> GraphStore {
107        let graph = GraphStore::new();
108
109        // Create a cycle: A -> B -> C -> A
110        let _ = graph.add_node_with_label("A", "Node A", "host");
111        let _ = graph.add_node_with_label("B", "Node B", "host");
112        let _ = graph.add_node_with_label("C", "Node C", "host");
113
114        let _ = graph.add_edge_with_label("A", "B", "connects_to", 1.0);
115        let _ = graph.add_edge_with_label("B", "C", "connects_to", 1.0);
116        let _ = graph.add_edge_with_label("C", "A", "connects_to", 1.0);
117
118        graph
119    }
120
121    fn create_disconnected_graph() -> GraphStore {
122        let graph = GraphStore::new();
123
124        // Component 1: A -> B
125        let _ = graph.add_node_with_label("A", "Node A", "host");
126        let _ = graph.add_node_with_label("B", "Node B", "host");
127        let _ = graph.add_edge_with_label("A", "B", "connects_to", 1.0);
128
129        // Component 2: C -> D -> E
130        let _ = graph.add_node_with_label("C", "Node C", "host");
131        let _ = graph.add_node_with_label("D", "Node D", "host");
132        let _ = graph.add_node_with_label("E", "Node E", "host");
133        let _ = graph.add_edge_with_label("C", "D", "connects_to", 1.0);
134        let _ = graph.add_edge_with_label("D", "E", "connects_to", 1.0);
135
136        // Component 3: F (isolated)
137        let _ = graph.add_node_with_label("F", "Node F", "host");
138
139        graph
140    }
141
142    // PageRank tests
143    #[test]
144    fn test_pagerank_empty_graph() {
145        let graph = GraphStore::new();
146        let result = PageRank::new().run(&graph);
147        assert!(result.scores.is_empty());
148        assert!(result.converged);
149    }
150
151    #[test]
152    fn test_pagerank_single_node() {
153        let graph = GraphStore::new();
154        let _ = graph.add_node_with_label("A", "Node A", "host");
155
156        let result = PageRank::new().run(&graph);
157        assert_eq!(result.scores.len(), 1);
158        assert!((result.scores["A"] - 1.0).abs() < 0.01);
159    }
160
161    #[test]
162    fn test_pagerank_diamond() {
163        let graph = create_test_graph();
164        let result = PageRank::new().run(&graph);
165
166        // D should have highest score (most incoming)
167        let top = result.top(4);
168        assert_eq!(top[0].0, "D");
169        assert!(result.converged);
170    }
171
172    #[test]
173    fn test_pagerank_top_n() {
174        let graph = create_test_graph();
175        let result = PageRank::new().run(&graph);
176
177        let top2 = result.top(2);
178        assert_eq!(top2.len(), 2);
179    }
180
181    // Connected Components tests
182    #[test]
183    fn test_components_single_component() {
184        let graph = create_test_graph();
185        let result = ConnectedComponents::find(&graph);
186
187        assert_eq!(result.count, 1);
188        assert_eq!(result.components[0].size, 4);
189    }
190
191    #[test]
192    fn test_components_disconnected() {
193        let graph = create_disconnected_graph();
194        let result = ConnectedComponents::find(&graph);
195
196        assert_eq!(result.count, 3);
197        // Largest component has 3 nodes
198        assert_eq!(result.largest().unwrap().size, 3);
199    }
200
201    #[test]
202    fn test_components_filter_by_size() {
203        let graph = create_disconnected_graph();
204        let result = ConnectedComponents::find(&graph);
205
206        let large = result.filter_by_size(2);
207        assert_eq!(large.len(), 2); // Only components with 2+ nodes
208    }
209
210    #[test]
211    fn test_components_component_of() {
212        let graph = create_disconnected_graph();
213        let result = ConnectedComponents::find(&graph);
214
215        let comp = result.component_of("D").unwrap();
216        assert!(comp.nodes.contains(&"C".to_string()));
217        assert!(comp.nodes.contains(&"E".to_string()));
218    }
219
220    // Betweenness Centrality tests
221    #[test]
222    fn test_betweenness_empty() {
223        let graph = GraphStore::new();
224        let result = BetweennessCentrality::compute(&graph, false);
225        assert!(result.scores.is_empty());
226    }
227
228    #[test]
229    fn test_betweenness_diamond() {
230        let graph = create_test_graph();
231        let result = BetweennessCentrality::compute(&graph, false);
232
233        // B and C should have similar betweenness
234        // A is source, D is sink - they have 0 betweenness
235        assert!(result.score("A").unwrap() < result.score("B").unwrap());
236    }
237
238    #[test]
239    fn test_betweenness_normalized() {
240        let graph = create_test_graph();
241        let result = BetweennessCentrality::compute(&graph, true);
242
243        // Normalized scores should be between 0 and 1
244        for score in result.scores.values() {
245            assert!(*score >= 0.0 && *score <= 1.0);
246        }
247    }
248
249    // Community Detection tests
250    #[test]
251    fn test_communities_connected() {
252        let graph = create_test_graph();
253        let result = LabelPropagation::new().run(&graph);
254
255        // Fully connected graph should form 1 community
256        assert!(result.communities.len() <= 2);
257    }
258
259    #[test]
260    fn test_communities_disconnected() {
261        let graph = create_disconnected_graph();
262        let result = LabelPropagation::new().run(&graph);
263
264        // Should find at least 3 communities
265        assert!(result.communities.len() >= 3);
266    }
267
268    #[test]
269    fn test_communities_convergence() {
270        let graph = create_test_graph();
271        let result = LabelPropagation::new().max_iterations(50).run(&graph);
272
273        assert!(result.converged || result.iterations == 50);
274    }
275
276    // Cycle Detection tests
277    #[test]
278    fn test_cycles_no_cycle() {
279        let graph = create_test_graph(); // Diamond has no cycles
280        let result = CycleDetector::new().find(&graph);
281
282        assert!(result.cycles.is_empty());
283    }
284
285    #[test]
286    fn test_cycles_simple_cycle() {
287        let graph = create_cycle_graph();
288        let result = CycleDetector::new().find(&graph);
289
290        assert!(!result.cycles.is_empty());
291        assert_eq!(result.cycles[0].length, 3); // A -> B -> C -> A
292    }
293
294    #[test]
295    fn test_cycles_max_length() {
296        let graph = create_cycle_graph();
297        let result = CycleDetector::new().max_length(2).find(&graph);
298
299        // Cycle of length 3 should not be found with max_length=2
300        assert!(result.cycles.is_empty());
301    }
302
303    #[test]
304    fn test_cycles_max_cycles_limit() {
305        let graph = create_cycle_graph();
306        let result = CycleDetector::new().max_cycles(0).find(&graph);
307
308        assert!(result.cycles.is_empty());
309        assert!(result.limit_reached);
310    }
311
312    // ========================================================================
313    // Performance Benchmarks (1M edges < 5 seconds target)
314    // ========================================================================
315
316    /// Generate a large graph for performance testing
317    fn create_large_graph(node_count: usize, edge_multiplier: usize) -> GraphStore {
318        let graph = GraphStore::new();
319
320        // Create nodes
321        for i in 0..node_count {
322            let node_id = format!("n{}", i);
323            let label = format!("Node {}", i);
324            let _ = graph.add_node_with_label(&node_id, &label, "host");
325        }
326
327        // Create edges (scale-free network style - some nodes have many connections)
328        let mut edge_count = 0;
329        for i in 0..node_count {
330            // Each node connects to edge_multiplier random nodes
331            for j in 1..=edge_multiplier {
332                let target = (i + j * 17) % node_count; // Pseudo-random but deterministic
333                if target != i {
334                    let source_id = format!("n{}", i);
335                    let target_id = format!("n{}", target);
336                    let _ = graph.add_edge_with_label(&source_id, &target_id, "connects_to", 1.0);
337                    edge_count += 1;
338                }
339            }
340        }
341
342        eprintln!(
343            "Created graph with {} nodes and {} edges",
344            node_count, edge_count
345        );
346        graph
347    }
348
349    /// Benchmark: 1M edges graph creation should be fast
350    #[test]
351    #[ignore] // Run with: cargo test bench_graph_creation --release -- --ignored --nocapture
352    fn bench_graph_creation_1m_edges() {
353        use std::time::Instant;
354
355        // Target: 1M edges = ~100K nodes with 10 edges each
356        let node_count = 100_000;
357        let edge_multiplier = 10;
358
359        let start = Instant::now();
360        let graph = create_large_graph(node_count, edge_multiplier);
361        let elapsed = start.elapsed();
362
363        let stats = graph.stats();
364        eprintln!("Graph creation: {:?}", elapsed);
365        eprintln!("Nodes: {}, Edges: {}", stats.node_count, stats.edge_count);
366
367        // Target: < 5 seconds
368        assert!(elapsed.as_secs() < 5, "Graph creation took {:?}", elapsed);
369    }
370
371    /// Benchmark: PageRank on 1M edges
372    #[test]
373    #[ignore]
374    fn bench_pagerank_1m_edges() {
375        use std::time::Instant;
376
377        let graph = create_large_graph(100_000, 10);
378
379        let start = Instant::now();
380        let result = PageRank::new().run(&graph);
381        let elapsed = start.elapsed();
382
383        eprintln!("PageRank: {:?} ({} iterations)", elapsed, result.iterations);
384        eprintln!("Top 5 nodes by rank:");
385        let mut scores: Vec<_> = result.scores.iter().collect();
386        scores.sort_by(|a, b| b.1.partial_cmp(a.1).unwrap());
387        for (node, score) in scores.iter().take(5) {
388            eprintln!("  {}: {:.6}", node, score);
389        }
390
391        // Target: < 5 seconds
392        assert!(elapsed.as_secs() < 5, "PageRank took {:?}", elapsed);
393    }
394
395    /// Benchmark: Connected Components on 1M edges
396    #[test]
397    #[ignore]
398    fn bench_components_1m_edges() {
399        use std::time::Instant;
400
401        let graph = create_large_graph(100_000, 10);
402
403        let start = Instant::now();
404        let result = ConnectedComponents::find(&graph);
405        let elapsed = start.elapsed();
406
407        eprintln!("Connected Components: {:?}", elapsed);
408        eprintln!("Found {} components", result.count);
409
410        // Target: < 5 seconds
411        assert!(
412            elapsed.as_secs() < 5,
413            "Connected Components took {:?}",
414            elapsed
415        );
416    }
417
418    /// Benchmark: Betweenness Centrality on smaller graph (O(V*E) complexity)
419    #[test]
420    #[ignore]
421    fn bench_betweenness_100k_edges() {
422        use std::time::Instant;
423
424        // Betweenness is O(V*E), so we use a smaller graph
425        let graph = create_large_graph(10_000, 10);
426
427        let start = Instant::now();
428        let result = BetweennessCentrality::compute(&graph, true);
429        let elapsed = start.elapsed();
430
431        eprintln!("Betweenness Centrality: {:?}", elapsed);
432        let max_score = result.top(1).first().map(|(_, s)| *s).unwrap_or(0.0);
433        eprintln!("Max centrality: {:.6}", max_score);
434
435        // Target: < 30 seconds for 100K edges
436        assert!(elapsed.as_secs() < 30, "Betweenness took {:?}", elapsed);
437    }
438
439    /// Benchmark: Community Detection on 1M edges
440    #[test]
441    #[ignore]
442    fn bench_communities_1m_edges() {
443        use std::time::Instant;
444
445        let graph = create_large_graph(100_000, 10);
446
447        let start = Instant::now();
448        let result = LabelPropagation::new().run(&graph);
449        let elapsed = start.elapsed();
450
451        eprintln!(
452            "Community Detection: {:?} ({} iterations)",
453            elapsed, result.iterations
454        );
455        eprintln!("Found {} communities", result.communities.len());
456
457        // Target: < 5 seconds
458        assert!(
459            elapsed.as_secs() < 5,
460            "Community Detection took {:?}",
461            elapsed
462        );
463    }
464
465    /// Benchmark: Cycle Detection on smaller graph (cycle detection is expensive)
466    #[test]
467    #[ignore]
468    fn bench_cycles_10k_edges() {
469        use std::time::Instant;
470
471        // Cycle detection is expensive, use smaller graph
472        let graph = create_large_graph(1_000, 10);
473
474        let start = Instant::now();
475        let result = CycleDetector::new()
476            .max_length(5)
477            .max_cycles(100)
478            .find(&graph);
479        let elapsed = start.elapsed();
480
481        eprintln!("Cycle Detection: {:?}", elapsed);
482        eprintln!(
483            "Found {} cycles (limit reached: {})",
484            result.cycles.len(),
485            result.limit_reached
486        );
487
488        // Target: < 10 seconds
489        assert!(elapsed.as_secs() < 10, "Cycle Detection took {:?}", elapsed);
490    }
491
492    /// Comprehensive benchmark suite
493    #[test]
494    #[ignore]
495    fn bench_full_suite() {
496        use std::time::Instant;
497
498        eprintln!("\n=== Graph Intelligence Benchmark Suite ===\n");
499
500        // Create test graphs
501        let small_graph = create_large_graph(1_000, 10);
502        let medium_graph = create_large_graph(10_000, 10);
503        let large_graph = create_large_graph(100_000, 10);
504
505        eprintln!(
506            "Small graph:  {} nodes, {} edges",
507            small_graph.stats().node_count,
508            small_graph.stats().edge_count
509        );
510        eprintln!(
511            "Medium graph: {} nodes, {} edges",
512            medium_graph.stats().node_count,
513            medium_graph.stats().edge_count
514        );
515        eprintln!(
516            "Large graph:  {} nodes, {} edges\n",
517            large_graph.stats().node_count,
518            large_graph.stats().edge_count
519        );
520
521        // PageRank benchmarks
522        let start = Instant::now();
523        let _ = PageRank::new().run(&small_graph);
524        eprintln!("PageRank (10K edges):  {:?}", start.elapsed());
525
526        let start = Instant::now();
527        let _ = PageRank::new().run(&medium_graph);
528        eprintln!("PageRank (100K edges): {:?}", start.elapsed());
529
530        let start = Instant::now();
531        let _ = PageRank::new().run(&large_graph);
532        eprintln!("PageRank (1M edges):   {:?}", start.elapsed());
533
534        eprintln!();
535
536        // Connected Components benchmarks
537        let start = Instant::now();
538        let _ = ConnectedComponents::find(&small_graph);
539        eprintln!("Components (10K edges):  {:?}", start.elapsed());
540
541        let start = Instant::now();
542        let _ = ConnectedComponents::find(&medium_graph);
543        eprintln!("Components (100K edges): {:?}", start.elapsed());
544
545        let start = Instant::now();
546        let _ = ConnectedComponents::find(&large_graph);
547        eprintln!("Components (1M edges):   {:?}", start.elapsed());
548
549        eprintln!();
550
551        // Community Detection benchmarks
552        let start = Instant::now();
553        let _ = LabelPropagation::new().run(&small_graph);
554        eprintln!("Communities (10K edges):  {:?}", start.elapsed());
555
556        let start = Instant::now();
557        let _ = LabelPropagation::new().run(&medium_graph);
558        eprintln!("Communities (100K edges): {:?}", start.elapsed());
559
560        let start = Instant::now();
561        let _ = LabelPropagation::new().run(&large_graph);
562        eprintln!("Communities (1M edges):   {:?}", start.elapsed());
563
564        eprintln!();
565
566        // Betweenness (only on smaller graphs due to O(V*E) complexity)
567        let start = Instant::now();
568        let _ = BetweennessCentrality::compute(&small_graph, true);
569        eprintln!("Betweenness (10K edges):  {:?}", start.elapsed());
570
571        let start = Instant::now();
572        let _ = BetweennessCentrality::compute(&medium_graph, true);
573        eprintln!("Betweenness (100K edges): {:?}", start.elapsed());
574
575        eprintln!("\n=== Benchmark Complete ===");
576    }
577}