scirs2_graph/algorithms/community/
hierarchical.rs

1//! Hierarchical community detection algorithms
2
3use super::modularity::modularity;
4use super::types::{CommunityResult, CommunityStructure};
5use crate::base::{EdgeWeight, Graph, IndexType, Node};
6use std::collections::{HashMap, HashSet};
7use std::hash::Hash;
8
9/// Hierarchical community structure using agglomerative clustering (legacy API)
10///
11/// **Note**: This function is deprecated in favor of `hierarchical_communities_result`.
12/// It will be removed in version 2.0.
13///
14/// This algorithm starts with each node as its own community and iteratively
15/// merges communities to maximize modularity. It builds a dendrogram-like
16/// structure showing the hierarchy of communities.
17///
18/// # Arguments
19/// * `graph` - The graph to analyze
20/// * `linkage` - Linkage criterion ("single", "complete", "average")
21///
22/// # Returns
23/// * A vector of community structures at different hierarchy levels
24#[deprecated(
25    since = "0.1.0-beta.2",
26    note = "Use `hierarchical_communities_result` instead"
27)]
28#[allow(dead_code)]
29pub fn hierarchical_communities<N, E, Ix>(
30    graph: &Graph<N, E, Ix>,
31    linkage: &str,
32) -> Vec<CommunityStructure<N>>
33where
34    N: Node + Clone + Hash + Eq + std::fmt::Debug,
35    E: EdgeWeight + Into<f64> + Copy,
36    Ix: IndexType,
37{
38    let nodes: Vec<N> = graph.nodes().into_iter().cloned().collect();
39    let n = nodes.len();
40
41    if n == 0 {
42        return vec![];
43    }
44
45    let mut results = Vec::new();
46
47    // Start with each node as its own community
48    let mut current_communities: HashMap<N, usize> = nodes
49        .iter()
50        .enumerate()
51        .map(|(i, node)| (node.clone(), i))
52        .collect();
53
54    // Record initial state
55    let initial_mod = modularity(graph, &current_communities);
56    results.push(CommunityStructure {
57        node_communities: current_communities.clone(),
58        modularity: initial_mod,
59    });
60
61    // Keep track of which communities exist
62    let mut active_communities: HashSet<usize> = (0..n).collect();
63
64    // Agglomerative merging
65    while active_communities.len() > 1 {
66        let mut best_merge: Option<(usize, usize)> = None;
67        let mut best_modularity = f64::NEG_INFINITY;
68
69        // Try all possible merges
70        let communities_vec: Vec<usize> = active_communities.iter().cloned().collect();
71        for i in 0..communities_vec.len() {
72            for j in (i + 1)..communities_vec.len() {
73                let comm1 = communities_vec[i];
74                let comm2 = communities_vec[j];
75
76                // Check if these communities are connected
77                if are_communities_connected(graph, &current_communities, comm1, comm2) {
78                    // Try merging these communities
79                    let mut test_communities = current_communities.clone();
80                    for (_, community) in test_communities.iter_mut() {
81                        if *community == comm2 {
82                            *community = comm1;
83                        }
84                    }
85
86                    let test_modularity = modularity(graph, &test_communities);
87
88                    // Use different criteria based on linkage
89                    let score = match linkage {
90                        "single" => {
91                            calculate_single_linkage(graph, &current_communities, comm1, comm2)
92                        }
93                        "complete" => {
94                            calculate_complete_linkage(graph, &current_communities, comm1, comm2)
95                        }
96                        "average" => {
97                            calculate_average_linkage(graph, &current_communities, comm1, comm2)
98                        }
99                        _ => test_modularity, // Default to modularity
100                    };
101
102                    if score > best_modularity {
103                        best_modularity = score;
104                        best_merge = Some((comm1, comm2));
105                    }
106                }
107            }
108        }
109
110        // Perform best merge
111        if let Some((comm1, comm2)) = best_merge {
112            // Merge comm2 into comm1
113            for (_, community) in current_communities.iter_mut() {
114                if *community == comm2 {
115                    *community = comm1;
116                }
117            }
118            active_communities.remove(&comm2);
119
120            // Record this level
121            let current_mod = modularity(graph, &current_communities);
122            results.push(CommunityStructure {
123                node_communities: current_communities.clone(),
124                modularity: current_mod,
125            });
126        } else {
127            // No more valid merges
128            break;
129        }
130    }
131
132    // Renumber all community structures
133    for result in &mut results {
134        let mut community_map: HashMap<usize, usize> = HashMap::new();
135        let mut next_id = 0;
136        for &comm in result.node_communities.values() {
137            if let std::collections::hash_map::Entry::Vacant(e) = community_map.entry(comm) {
138                e.insert(next_id);
139                next_id += 1;
140            }
141        }
142
143        // Apply renumbering
144        for (_, comm) in result.node_communities.iter_mut() {
145            *comm = community_map[comm];
146        }
147    }
148
149    results
150}
151
152/// Hierarchical community structure with standardized CommunityResult return type
153///
154/// This function provides the same functionality as `hierarchical_communities` but returns
155/// a vector of standardized `CommunityResult` types that provide multiple ways to access
156/// the community structure at each hierarchy level.
157///
158/// This algorithm starts with each node as its own community and iteratively
159/// merges communities to maximize modularity. It builds a dendrogram-like
160/// structure showing the hierarchy of communities.
161///
162/// # Arguments
163/// * `graph` - The graph to analyze
164/// * `linkage` - Linkage criterion ("single", "complete", "average")
165///
166/// # Returns
167/// * A vector of `CommunityResult`s representing different hierarchy levels
168///
169/// # Time Complexity
170/// O(n³) for complete linkage, O(n² * m) for single linkage, where n is the number
171/// of nodes and m is the number of edges. The algorithm builds a complete dendrogram
172/// by iteratively merging communities based on the specified linkage criterion.
173///
174/// # Space Complexity
175/// O(n²) for storing the distance matrix and the hierarchical structure at all levels.
176///
177/// # Example
178/// ```rust
179/// use scirs2_graph::{Graph, hierarchical_communities_result};
180///
181/// let mut graph: Graph<i32, f64> = Graph::new();
182/// // ... add nodes and edges ...
183/// let results = hierarchical_communities_result(&graph, "average");
184///
185/// for (level, result) in results.iter().enumerate() {
186///     println!("Level {}: {} communities", level, result.num_communities);
187/// }
188/// ```
189#[allow(dead_code)]
190pub fn hierarchical_communities_result<N, E, Ix>(
191    graph: &Graph<N, E, Ix>,
192    linkage: &str,
193) -> Vec<CommunityResult<N>>
194where
195    N: Node + Clone + Hash + Eq + std::fmt::Debug,
196    E: EdgeWeight + Into<f64> + Copy,
197    Ix: IndexType,
198{
199    #[allow(deprecated)]
200    let structures = hierarchical_communities(graph, linkage);
201    structures
202        .into_iter()
203        .map(CommunityResult::from_community_structure)
204        .collect()
205}
206
207/// Check if two communities are connected by at least one edge
208#[allow(dead_code)]
209fn are_communities_connected<N, E, Ix>(
210    graph: &Graph<N, E, Ix>,
211    communities: &HashMap<N, usize>,
212    comm1: usize,
213    comm2: usize,
214) -> bool
215where
216    N: Node + std::fmt::Debug,
217    E: EdgeWeight,
218    Ix: IndexType,
219{
220    for (node, &node_comm) in communities {
221        if node_comm == comm1 {
222            if let Ok(neighbors) = graph.neighbors(node) {
223                for neighbor in neighbors {
224                    if let Some(&neighbor_comm) = communities.get(&neighbor) {
225                        if neighbor_comm == comm2 {
226                            return true;
227                        }
228                    }
229                }
230            }
231        }
232    }
233    false
234}
235
236/// Calculate single linkage distance (minimum distance between communities)
237#[allow(dead_code)]
238fn calculate_single_linkage<N, E, Ix>(
239    graph: &Graph<N, E, Ix>,
240    communities: &HashMap<N, usize>,
241    comm1: usize,
242    comm2: usize,
243) -> f64
244where
245    N: Node + std::fmt::Debug,
246    E: EdgeWeight + Into<f64> + Copy,
247    Ix: IndexType,
248{
249    let mut min_distance = f64::INFINITY;
250
251    for (node1, &node1_comm) in communities {
252        if node1_comm == comm1 {
253            for (node2, &node2_comm) in communities {
254                if node2_comm == comm2 {
255                    if let Ok(weight) = graph.edge_weight(node1, node2) {
256                        let distance = 1.0 / (1.0 + weight.into()); // Convert weight to distance
257                        min_distance = min_distance.min(distance);
258                    }
259                }
260            }
261        }
262    }
263
264    if min_distance == f64::INFINITY {
265        0.0 // No direct connection
266    } else {
267        1.0 / min_distance // Convert back to similarity
268    }
269}
270
271/// Calculate complete linkage distance (maximum distance between communities)
272#[allow(dead_code)]
273fn calculate_complete_linkage<N, E, Ix>(
274    graph: &Graph<N, E, Ix>,
275    communities: &HashMap<N, usize>,
276    comm1: usize,
277    comm2: usize,
278) -> f64
279where
280    N: Node + std::fmt::Debug,
281    E: EdgeWeight + Into<f64> + Copy,
282    Ix: IndexType,
283{
284    let mut max_distance: f64 = 0.0;
285    let mut found_connection = false;
286
287    for (node1, &node1_comm) in communities {
288        if node1_comm == comm1 {
289            for (node2, &node2_comm) in communities {
290                if node2_comm == comm2 {
291                    if let Ok(weight) = graph.edge_weight(node1, node2) {
292                        let distance = 1.0 / (1.0 + weight.into());
293                        max_distance = max_distance.max(distance);
294                        found_connection = true;
295                    }
296                }
297            }
298        }
299    }
300
301    if found_connection {
302        1.0 / max_distance
303    } else {
304        0.0
305    }
306}
307
308/// Calculate average linkage distance (average distance between communities)
309#[allow(dead_code)]
310fn calculate_average_linkage<N, E, Ix>(
311    graph: &Graph<N, E, Ix>,
312    communities: &HashMap<N, usize>,
313    comm1: usize,
314    comm2: usize,
315) -> f64
316where
317    N: Node + std::fmt::Debug,
318    E: EdgeWeight + Into<f64> + Copy,
319    Ix: IndexType,
320{
321    let mut total_distance = 0.0;
322    let mut count = 0;
323
324    for (node1, &node1_comm) in communities {
325        if node1_comm == comm1 {
326            for (node2, &node2_comm) in communities {
327                if node2_comm == comm2 {
328                    if let Ok(weight) = graph.edge_weight(node1, node2) {
329                        let distance = 1.0 / (1.0 + weight.into());
330                        total_distance += distance;
331                        count += 1;
332                    }
333                }
334            }
335        }
336    }
337
338    if count > 0 {
339        1.0 / (total_distance / count as f64)
340    } else {
341        0.0
342    }
343}