Skip to main content

sqlitegraph/algo/
community.rs

1//! Community detection algorithms for graph analysis.
2//!
3//! This module provides algorithms for discovering communities (clusters) in graphs.
4//! Community detection groups nodes that are more densely connected to each other
5//! than to nodes outside the group.
6//!
7//! # Available Algorithms
8//!
9//! - [`label_propagation`] - Fast label propagation for community discovery
10//! - [`louvain_communities`] - Louvain method for modularity optimization
11//!
12//! # When to Use Community Detection
13//!
14//! - **Label Propagation**: Fast community detection on large graphs, exploratory
15//!   analysis where speed matters more than quality, baseline comparison for other
16//!   clustering methods, incremental clustering where results update frequently
17//! - **Louvain**: High-quality community detection where modularity matters,
18//!   hierarchical clustering to reveal multi-scale structure, research applications
19//!   requiring reproducible results, final clustering when offline computation is
20//!   acceptable
21
22use ahash::AHashMap;
23
24use crate::progress::ProgressCallback;
25use crate::{errors::SqliteGraphError, graph::SqliteGraph};
26
27/// Label Propagation algorithm for fast community detection.
28///
29/// Each node starts with its own label, then iteratively adopts the most frequent
30/// label among its neighbors. Converges when no labels change or max_iterations reached.
31///
32/// This is a near-linear time algorithm suitable for large graphs. Uses deterministic
33/// tiebreaking (smallest label wins) for reproducible results.
34///
35/// # Arguments
36/// * `graph` - The graph to analyze
37/// * `max_iterations` - Maximum number of iterations to prevent infinite loops (typically 5-10)
38///
39/// # Returns
40/// Communities as vectors of node IDs, sorted by smallest node ID in each community.
41///
42/// # Complexity
43/// Time: O(k * |E|) where k = iterations (typically 5-10)
44/// Space: O(|V|) for label storage
45///
46/// # Algorithm Details
47/// - Initialize each node with unique label (node ID)
48/// - Iteratively adopt most frequent neighbor label
49/// - Bidirectional edges (both incoming and outgoing neighbors)
50/// - Deterministic tiebreaking: smallest label wins
51/// - Early stopping when converged (no labels change)
52///
53/// # References
54/// - Raghavan, U. N., Albert, R., & Kumara, S. (2007). "Near linear time algorithm to detect community structures in large-scale networks."
55///
56/// # Example
57/// ```rust
58/// use sqlitegraph::{SqliteGraph, algo::label_propagation};
59/// let graph = SqliteGraph::open_in_memory()?;
60/// // ... add nodes and edges ...
61/// let communities = label_propagation(&graph, 10)?;
62/// ```
63pub fn label_propagation(
64    graph: &SqliteGraph,
65    max_iterations: usize,
66) -> Result<Vec<Vec<i64>>, SqliteGraphError> {
67    let all_ids = graph.all_entity_ids()?;
68
69    if all_ids.is_empty() {
70        return Ok(Vec::new());
71    }
72
73    // Initialize: each node gets its own label
74    let mut labels: AHashMap<i64, i64> = all_ids.iter().map(|&id| (id, id)).collect();
75
76    // For deterministic results, process nodes in sorted order
77    let mut node_order: Vec<i64> = all_ids.clone();
78    node_order.sort();
79
80    // Iterative label propagation
81    for _iteration in 0..max_iterations {
82        let mut any_changed = false;
83
84        for &node in &node_order {
85            // Count neighbor labels
86            let mut label_counts: AHashMap<i64, usize> = AHashMap::new();
87
88            // Count outgoing neighbors
89            for &neighbor in &graph.fetch_outgoing(node)? {
90                let neighbor_label = labels.get(&neighbor).unwrap_or(&neighbor);
91                *label_counts.entry(*neighbor_label).or_insert(0) += 1;
92            }
93
94            // Count incoming neighbors
95            for &neighbor in &graph.fetch_incoming(node)? {
96                let neighbor_label = labels.get(&neighbor).unwrap_or(&neighbor);
97                *label_counts.entry(*neighbor_label).or_insert(0) += 1;
98            }
99
100            // Find most frequent label (deterministic tiebreak: smallest label)
101            if let Some((&_most_frequent_label, _)) = label_counts
102                .iter()
103                .max_by_key(|(_, count)| *count)
104                .map(|(label, count)| (label, *count))
105            {
106                // In case of ties, max_by_key returns arbitrary one
107                // So we need to find all with max count and take smallest label
108                let max_count = *label_counts.values().max().unwrap_or(&0);
109                let best_label = label_counts
110                    .iter()
111                    .filter(|(_, count)| **count == max_count)
112                    .map(|(&label, _)| label)
113                    .min()
114                    .unwrap_or(node);
115
116                if let Some(current_label) = labels.get(&node) {
117                    if *current_label != best_label {
118                        labels.insert(node, best_label);
119                        any_changed = true;
120                    }
121                }
122            }
123        }
124
125        if !any_changed {
126            break;
127        }
128    }
129
130    // Group nodes by final label
131    let mut communities_map: AHashMap<i64, Vec<i64>> = AHashMap::new();
132    for (node, label) in &labels {
133        communities_map
134            .entry(*label)
135            .or_insert_with(Vec::new)
136            .push(*node);
137    }
138
139    // Convert to sorted vector of communities
140    let mut communities: Vec<Vec<i64>> = communities_map.into_values().collect();
141    for community in &mut communities {
142        community.sort();
143    }
144    communities.sort_by(|a, b| a.first().cmp(&b.first()));
145
146    Ok(communities)
147}
148
149/// Louvain method for community detection via modularity optimization.
150///
151/// Iteratively moves nodes to maximize modularity (how many edges are within
152/// communities vs between communities). This is a simplified single-pass version.
153///
154/// # Arguments
155/// * `graph` - The graph to analyze
156/// * `max_iterations` - Maximum number of iterations to prevent infinite loops (typically 10-20)
157///
158/// # Returns
159/// Communities as vectors of node IDs, sorted by smallest node ID in each community.
160///
161/// # Complexity
162/// Time: O(k * |V| * |E|) where k = iterations
163/// Space: O(|V|) for community assignments and degrees
164///
165/// # Algorithm Details
166/// Simplified single-pass modularity optimization (no multi-level aggregation):
167/// 1. Initialize each node in its own community
168/// 2. Calculate total edges (m) and node degrees
169/// 3. Iteratively move nodes to maximize modularity delta:
170///    ΔQ = (2*edges_to_community - node_degree*community_degree/m) / (2*m)
171/// 4. Stop when no moves improve modularity
172///
173/// Modularity measures edge density within communities vs random expectation.
174/// Higher values indicate better community structure (typical range: 0.3-0.7).
175///
176/// # Caveats
177/// - Simplified version (no multi-level aggregation)
178/// - May converge to local optima (not guaranteed global optimum)
179/// - Performance depends on graph structure and edge distribution
180///
181/// # References
182/// - Blondel, V. D., et al. (2008). "Fast unfolding of communities in large networks."
183///
184/// # Example
185/// ```rust
186/// use sqlitegraph::{SqliteGraph, algo::louvain_communities};
187/// let graph = SqliteGraph::open_in_memory()?;
188/// // ... add nodes and edges ...
189/// let communities = louvain_communities(&graph, 10)?;
190/// ```
191pub fn louvain_communities(
192    graph: &SqliteGraph,
193    max_iterations: usize,
194) -> Result<Vec<Vec<i64>>, SqliteGraphError> {
195    let all_ids = graph.all_entity_ids()?;
196
197    if all_ids.is_empty() {
198        return Ok(Vec::new());
199    }
200
201    // Calculate total edges (m) and node degrees
202    let mut total_edges = 0usize;
203    let mut degrees: AHashMap<i64, usize> = AHashMap::new();
204
205    for &id in &all_ids {
206        let out_count = graph.fetch_outgoing(id)?.len();
207        let in_count = graph.fetch_incoming(id)?.len();
208        let degree = out_count + in_count;
209        degrees.insert(id, degree);
210        total_edges += degree;
211    }
212
213    // Total edges m (undirected: each edge counted twice, so m = sum_degrees / 2)
214    let m = total_edges as f64 / 2.0;
215
216    if m == 0.0 {
217        // No edges - each node is its own community
218        let mut communities: Vec<Vec<i64>> = all_ids.iter().map(|&id| vec![id]).collect();
219        communities.sort();
220        return Ok(communities);
221    }
222
223    // Initialize: each node in its own community
224    let mut communities: AHashMap<i64, i64> = all_ids.iter().map(|&id| (id, id)).collect();
225
226    // For deterministic results, process nodes in sorted order
227    let mut node_order: Vec<i64> = all_ids.clone();
228    node_order.sort();
229
230    // Iterative modularity optimization
231    for _iteration in 0..max_iterations {
232        let mut any_moved = false;
233
234        for &node in &node_order {
235            let current_community = *communities.get(&node).unwrap_or(&node);
236            let node_degree = *degrees.get(&node).unwrap_or(&0) as f64;
237
238            // Find neighbor communities
239            let mut community_connections: AHashMap<i64, f64> = AHashMap::new();
240
241            // Count outgoing edges
242            for &neighbor in &graph.fetch_outgoing(node)? {
243                let neighbor_community = *communities.get(&neighbor).unwrap_or(&neighbor);
244                *community_connections
245                    .entry(neighbor_community)
246                    .or_insert(0.0) += 1.0;
247            }
248
249            // Count incoming edges
250            for &neighbor in &graph.fetch_incoming(node)? {
251                let neighbor_community = *communities.get(&neighbor).unwrap_or(&neighbor);
252                *community_connections
253                    .entry(neighbor_community)
254                    .or_insert(0.0) += 1.0;
255            }
256
257            // Calculate modularity delta for moving to each neighbor's community
258            let mut best_community = current_community;
259            let mut best_delta = 0.0f64;
260
261            for (&target_community, &edges_to_community) in &community_connections {
262                if target_community == current_community {
263                    continue;
264                }
265
266                // Calculate sum of degrees in target community
267                let community_degree: f64 = communities
268                    .iter()
269                    .filter(|(_, comm)| **comm == target_community)
270                    .map(|(&node, _)| *degrees.get(&node).unwrap_or(&0) as f64)
271                    .sum();
272
273                // Modularity delta formula:
274                // ΔQ = (edges_in / m) - (edges_total / m)^2
275                // Simplified for single node move:
276                // ΔQ = [(2*edges_to_community - node_degree*community_degree/m) / (2*m)]
277
278                let delta =
279                    (2.0 * edges_to_community - node_degree * community_degree / m) / (2.0 * m);
280
281                if delta > best_delta {
282                    best_delta = delta;
283                    best_community = target_community;
284                }
285            }
286
287            // Move node if it improves modularity
288            if best_community != current_community {
289                communities.insert(node, best_community);
290                any_moved = true;
291            }
292        }
293
294        if !any_moved {
295            break;
296        }
297    }
298
299    // Group nodes by final community
300    let mut communities_map: AHashMap<i64, Vec<i64>> = AHashMap::new();
301    for (node, community) in &communities {
302        communities_map
303            .entry(*community)
304            .or_insert_with(Vec::new)
305            .push(*node);
306    }
307
308    // Convert to sorted vector of communities
309    let mut result: Vec<Vec<i64>> = communities_map.into_values().collect();
310    for community in &mut result {
311        community.sort();
312    }
313    result.sort_by(|a, b| a.first().cmp(&b.first()));
314
315    Ok(result)
316}
317
318/// Louvain method for community detection with progress callback reporting.
319///
320/// This is the progress-reporting variant of [`louvain_communities`]. See that function
321/// for full algorithm documentation.
322///
323/// # Arguments
324/// * `graph` - The graph to analyze
325/// * `max_iterations` - Maximum number of iterations
326/// * `progress` - Callback for progress updates
327///
328/// # Progress Reporting
329/// - Reports progress for each iteration pass: "Louvain pass X"
330/// - Total is None (convergence unknown)
331/// - Calls `on_complete()` when finished or converged
332/// - Calls `on_error()` if an error occurs
333///
334/// # Example
335///
336/// ```rust
337/// use sqlitegraph::{SqliteGraph, algo::louvain_communities_with_progress};
338/// use sqlitegraph::progress::NoProgress;
339///
340/// let graph = SqliteGraph::open_in_memory()?;
341/// // ... add nodes and edges ...
342/// let progress = NoProgress;
343/// let communities = louvain_communities_with_progress(&graph, 10, &progress)?;
344/// ```
345pub fn louvain_communities_with_progress<F>(
346    graph: &SqliteGraph,
347    max_iterations: usize,
348    progress: &F,
349) -> Result<Vec<Vec<i64>>, SqliteGraphError>
350where
351    F: ProgressCallback,
352{
353    let all_ids = graph.all_entity_ids()?;
354
355    if all_ids.is_empty() {
356        progress.on_complete();
357        return Ok(Vec::new());
358    }
359
360    // Calculate total edges (m) and node degrees
361    let mut total_edges = 0usize;
362    let mut degrees: AHashMap<i64, usize> = AHashMap::new();
363
364    for &id in &all_ids {
365        let out_count = graph.fetch_outgoing(id)?.len();
366        let in_count = graph.fetch_incoming(id)?.len();
367        let degree = out_count + in_count;
368        degrees.insert(id, degree);
369        total_edges += degree;
370    }
371
372    // Total edges m (undirected: each edge counted twice, so m = sum_degrees / 2)
373    let m = total_edges as f64 / 2.0;
374
375    if m == 0.0 {
376        progress.on_complete();
377        // No edges - each node is its own community
378        let mut communities: Vec<Vec<i64>> = all_ids.iter().map(|&id| vec![id]).collect();
379        communities.sort();
380        return Ok(communities);
381    }
382
383    // Initialize: each node in its own community
384    let mut communities: AHashMap<i64, i64> = all_ids.iter().map(|&id| (id, id)).collect();
385
386    // For deterministic results, process nodes in sorted order
387    let mut node_order: Vec<i64> = all_ids.clone();
388    node_order.sort();
389
390    // Iterative modularity optimization with progress reporting
391    for iteration in 0..max_iterations {
392        progress.on_progress(
393            iteration + 1,
394            None,
395            &format!("Louvain pass {}", iteration + 1),
396        );
397
398        let mut any_moved = false;
399
400        for &node in &node_order {
401            let current_community = *communities.get(&node).unwrap_or(&node);
402            let node_degree = *degrees.get(&node).unwrap_or(&0) as f64;
403
404            // Find neighbor communities
405            let mut community_connections: AHashMap<i64, f64> = AHashMap::new();
406
407            // Count outgoing edges
408            for &neighbor in &graph.fetch_outgoing(node)? {
409                let neighbor_community = *communities.get(&neighbor).unwrap_or(&neighbor);
410                *community_connections
411                    .entry(neighbor_community)
412                    .or_insert(0.0) += 1.0;
413            }
414
415            // Count incoming edges
416            for &neighbor in &graph.fetch_incoming(node)? {
417                let neighbor_community = *communities.get(&neighbor).unwrap_or(&neighbor);
418                *community_connections
419                    .entry(neighbor_community)
420                    .or_insert(0.0) += 1.0;
421            }
422
423            // Calculate modularity delta for moving to each neighbor's community
424            let mut best_community = current_community;
425            let mut best_delta = 0.0f64;
426
427            for (&target_community, &edges_to_community) in &community_connections {
428                if target_community == current_community {
429                    continue;
430                }
431
432                // Calculate sum of degrees in target community
433                let community_degree: f64 = communities
434                    .iter()
435                    .filter(|(_, comm)| **comm == target_community)
436                    .map(|(&node, _)| *degrees.get(&node).unwrap_or(&0) as f64)
437                    .sum();
438
439                // Modularity delta formula:
440                // ΔQ = (edges_in / m) - (edges_total / m)^2
441                // Simplified for single node move:
442                // ΔQ = [(2*edges_to_community - node_degree*community_degree/m) / (2*m)]
443
444                let delta =
445                    (2.0 * edges_to_community - node_degree * community_degree / m) / (2.0 * m);
446
447                if delta > best_delta {
448                    best_delta = delta;
449                    best_community = target_community;
450                }
451            }
452
453            // Move node if it improves modularity
454            if best_community != current_community {
455                communities.insert(node, best_community);
456                any_moved = true;
457            }
458        }
459
460        if !any_moved {
461            break;
462        }
463    }
464
465    progress.on_complete();
466
467    // Group nodes by final community
468    let mut communities_map: AHashMap<i64, Vec<i64>> = AHashMap::new();
469    for (node, community) in &communities {
470        communities_map
471            .entry(*community)
472            .or_insert_with(Vec::new)
473            .push(*node);
474    }
475
476    // Convert to sorted vector of communities
477    let mut result: Vec<Vec<i64>> = communities_map.into_values().collect();
478    for community in &mut result {
479        community.sort();
480    }
481    result.sort_by(|a, b| a.first().cmp(&b.first()));
482
483    Ok(result)
484}