ruvector_mincut/cluster/
mod.rs

1//! Multi-level Cluster Hierarchy for Dynamic Minimum Cut
2//!
3//! Implements hierarchical clustering from the December 2024 paper.
4//! Enables efficient cut maintenance through recursive decomposition.
5
6pub mod hierarchy;
7
8use crate::graph::{DynamicGraph, EdgeId, VertexId};
9use std::collections::{HashMap, HashSet};
10use std::sync::Arc;
11
12/// A cluster at a specific level in the hierarchy
13#[derive(Debug, Clone)]
14pub struct Cluster {
15    /// Unique cluster ID
16    pub id: u64,
17    /// Level in hierarchy (0 = leaf level)
18    pub level: usize,
19    /// Vertices contained in this cluster
20    pub vertices: HashSet<VertexId>,
21    /// Boundary edges (edges leaving the cluster)
22    pub boundary_edges: Vec<EdgeId>,
23    /// Boundary size (cut value if this cluster were separated)
24    pub boundary_size: u64,
25    /// Parent cluster ID (None for root)
26    pub parent: Option<u64>,
27    /// Child cluster IDs (empty for leaf clusters)
28    pub children: Vec<u64>,
29}
30
31/// Multi-level cluster hierarchy
32pub struct ClusterHierarchy {
33    /// All clusters indexed by ID
34    pub clusters: HashMap<u64, Cluster>,
35    /// Root cluster ID
36    root_id: Option<u64>,
37    /// Number of levels
38    num_levels: usize,
39    /// Vertex to leaf cluster mapping
40    vertex_cluster: HashMap<VertexId, u64>,
41    /// Next cluster ID
42    next_id: u64,
43    /// Reference to graph
44    graph: Arc<DynamicGraph>,
45    /// Target cluster size at each level
46    target_sizes: Vec<usize>,
47}
48
49impl ClusterHierarchy {
50    /// Create a new hierarchy for the given graph
51    pub fn new(graph: Arc<DynamicGraph>) -> Self {
52        let mut hierarchy = Self {
53            clusters: HashMap::new(),
54            root_id: None,
55            num_levels: 0,
56            vertex_cluster: HashMap::new(),
57            next_id: 0,
58            graph,
59            target_sizes: Vec::new(),
60        };
61        hierarchy.rebuild();
62        hierarchy
63    }
64
65    /// Rebuild the entire hierarchy from scratch
66    pub fn rebuild(&mut self) {
67        self.clusters.clear();
68        self.vertex_cluster.clear();
69        self.next_id = 0;
70
71        let vertices = self.graph.vertices();
72        if vertices.is_empty() {
73            self.root_id = None;
74            self.num_levels = 0;
75            return;
76        }
77
78        // Compute number of levels: O(log n)
79        let n = vertices.len();
80        self.num_levels = (n as f64).log2().ceil() as usize + 1;
81
82        // Compute target sizes for each level
83        self.target_sizes = (0..self.num_levels)
84            .map(|l| 2usize.pow(l as u32).min(n))
85            .collect();
86
87        // Build leaf clusters (level 0)
88        let leaf_ids = self.build_leaf_clusters(&vertices);
89
90        // Build upper levels recursively
91        let mut current_level_ids = leaf_ids;
92        for level in 1..self.num_levels {
93            current_level_ids = self.build_level(level, &current_level_ids);
94            if current_level_ids.len() == 1 {
95                self.root_id = Some(current_level_ids[0]);
96                break;
97            }
98        }
99
100        if self.root_id.is_none() && !current_level_ids.is_empty() {
101            self.root_id = Some(current_level_ids[0]);
102        }
103    }
104
105    /// Build leaf clusters (each vertex is its own cluster initially)
106    fn build_leaf_clusters(&mut self, vertices: &[VertexId]) -> Vec<u64> {
107        vertices
108            .iter()
109            .map(|&v| {
110                let cluster_id = self.next_id;
111                self.next_id += 1;
112
113                // Compute boundary
114                let (boundary_edges, boundary_size) = self.compute_vertex_boundary(v);
115
116                let cluster = Cluster {
117                    id: cluster_id,
118                    level: 0,
119                    vertices: [v].into_iter().collect(),
120                    boundary_edges,
121                    boundary_size,
122                    parent: None,
123                    children: Vec::new(),
124                };
125
126                self.clusters.insert(cluster_id, cluster);
127                self.vertex_cluster.insert(v, cluster_id);
128                cluster_id
129            })
130            .collect()
131    }
132
133    /// Build a level by merging clusters from the previous level
134    fn build_level(&mut self, level: usize, child_ids: &[u64]) -> Vec<u64> {
135        // Group children into parent clusters
136        // Target: reduce number of clusters by factor of 2
137        let _target_count = (child_ids.len() + 1) / 2;
138        let mut parent_ids = Vec::new();
139
140        for chunk in child_ids.chunks(2) {
141            let parent_id = self.next_id;
142            self.next_id += 1;
143
144            // Merge child vertices
145            let mut vertices = HashSet::new();
146            for &child_id in chunk {
147                if let Some(child) = self.clusters.get_mut(&child_id) {
148                    vertices.extend(child.vertices.iter().copied());
149                    child.parent = Some(parent_id);
150                }
151            }
152
153            // Compute boundary for merged cluster
154            let (boundary_edges, boundary_size) = self.compute_cluster_boundary(&vertices);
155
156            let parent = Cluster {
157                id: parent_id,
158                level,
159                vertices,
160                boundary_edges,
161                boundary_size,
162                parent: None,
163                children: chunk.to_vec(),
164            };
165
166            self.clusters.insert(parent_id, parent);
167            parent_ids.push(parent_id);
168        }
169
170        parent_ids
171    }
172
173    /// Compute boundary edges and size for a single vertex
174    fn compute_vertex_boundary(&self, v: VertexId) -> (Vec<EdgeId>, u64) {
175        let mut boundary_edges = Vec::new();
176        let mut boundary_size = 0u64;
177
178        for edge in self.graph.edges() {
179            if edge.source == v || edge.target == v {
180                boundary_edges.push(edge.id);
181                boundary_size += 1;
182            }
183        }
184
185        (boundary_edges, boundary_size)
186    }
187
188    /// Compute boundary edges and size for a cluster
189    fn compute_cluster_boundary(&self, vertices: &HashSet<VertexId>) -> (Vec<EdgeId>, u64) {
190        let mut boundary_edges = Vec::new();
191        let mut boundary_size = 0u64;
192
193        for edge in self.graph.edges() {
194            let src_in = vertices.contains(&edge.source);
195            let tgt_in = vertices.contains(&edge.target);
196
197            // Edge crosses boundary if exactly one endpoint is inside
198            if src_in != tgt_in {
199                boundary_edges.push(edge.id);
200                boundary_size += 1;
201            }
202        }
203
204        (boundary_edges, boundary_size)
205    }
206
207    /// Handle edge insertion
208    pub fn insert_edge(&mut self, _edge_id: EdgeId, u: VertexId, v: VertexId) {
209        // Update boundaries for all clusters containing u or v
210        self.update_boundaries_for_vertices(&[u, v]);
211    }
212
213    /// Handle edge deletion
214    pub fn delete_edge(&mut self, _edge_id: EdgeId, u: VertexId, v: VertexId) {
215        // Update boundaries for all clusters containing u or v
216        self.update_boundaries_for_vertices(&[u, v]);
217    }
218
219    /// Update boundary information for clusters containing given vertices
220    fn update_boundaries_for_vertices(&mut self, vertices: &[VertexId]) {
221        // Find all clusters that need updating (traverse up the hierarchy)
222        let mut clusters_to_update = HashSet::new();
223
224        for &v in vertices {
225            if let Some(&cluster_id) = self.vertex_cluster.get(&v) {
226                let mut current = Some(cluster_id);
227                while let Some(id) = current {
228                    clusters_to_update.insert(id);
229                    current = self.clusters.get(&id).and_then(|c| c.parent);
230                }
231            }
232        }
233
234        // Update each cluster's boundary
235        for cluster_id in clusters_to_update {
236            if let Some(cluster) = self.clusters.get(&cluster_id) {
237                let vertices = cluster.vertices.clone();
238                let (boundary_edges, boundary_size) = self.compute_cluster_boundary(&vertices);
239
240                if let Some(cluster) = self.clusters.get_mut(&cluster_id) {
241                    cluster.boundary_edges = boundary_edges;
242                    cluster.boundary_size = boundary_size;
243                }
244            }
245        }
246    }
247
248    /// Find the smallest cluster containing both vertices
249    pub fn lowest_common_cluster(&self, u: VertexId, v: VertexId) -> Option<u64> {
250        let u_cluster = self.vertex_cluster.get(&u)?;
251        let v_cluster = self.vertex_cluster.get(&v)?;
252
253        // Build path from u to root
254        let mut u_path = HashSet::new();
255        let mut current = Some(*u_cluster);
256        while let Some(id) = current {
257            u_path.insert(id);
258            current = self.clusters.get(&id).and_then(|c| c.parent);
259        }
260
261        // Find first intersection with v's path
262        current = Some(*v_cluster);
263        while let Some(id) = current {
264            if u_path.contains(&id) {
265                return Some(id);
266            }
267            current = self.clusters.get(&id).and_then(|c| c.parent);
268        }
269
270        None
271    }
272
273    /// Get minimum boundary size across all clusters
274    pub fn min_boundary(&self) -> u64 {
275        self.clusters
276            .values()
277            .filter(|c| !c.vertices.is_empty() && c.vertices.len() < self.graph.num_vertices())
278            .map(|c| c.boundary_size)
279            .min()
280            .unwrap_or(u64::MAX)
281    }
282
283    /// Get cluster by ID
284    pub fn get_cluster(&self, id: u64) -> Option<&Cluster> {
285        self.clusters.get(&id)
286    }
287
288    /// Get number of levels
289    pub fn num_levels(&self) -> usize {
290        self.num_levels
291    }
292
293    /// Get root cluster
294    pub fn root(&self) -> Option<&Cluster> {
295        self.root_id.and_then(|id| self.clusters.get(&id))
296    }
297}
298
299#[cfg(test)]
300mod tests {
301    use super::*;
302
303    #[test]
304    fn test_empty_graph() {
305        let graph = Arc::new(DynamicGraph::new());
306        let hierarchy = ClusterHierarchy::new(graph);
307        assert_eq!(hierarchy.num_levels(), 0);
308        assert!(hierarchy.root().is_none());
309    }
310
311    #[test]
312    fn test_single_vertex() {
313        let graph = Arc::new(DynamicGraph::new());
314        graph.add_vertex(1);
315        let hierarchy = ClusterHierarchy::new(graph);
316        assert!(hierarchy.num_levels() >= 1);
317    }
318
319    #[test]
320    fn test_path_graph() {
321        let graph = Arc::new(DynamicGraph::new());
322        for i in 0..9 {
323            graph.insert_edge(i, i + 1, 1.0).unwrap();
324        }
325        let hierarchy = ClusterHierarchy::new(graph);
326        assert!(hierarchy.num_levels() > 1);
327        assert_eq!(hierarchy.min_boundary(), 1); // Path has min cut 1
328    }
329
330    #[test]
331    fn test_cycle_graph() {
332        let graph = Arc::new(DynamicGraph::new());
333        for i in 0..5 {
334            graph.insert_edge(i, (i + 1) % 5, 1.0).unwrap();
335        }
336        let hierarchy = ClusterHierarchy::new(graph);
337        assert_eq!(hierarchy.min_boundary(), 2); // Cycle has min cut 2
338    }
339
340    #[test]
341    fn test_lowest_common_cluster() {
342        let graph = Arc::new(DynamicGraph::new());
343        graph.insert_edge(0, 1, 1.0).unwrap();
344        graph.insert_edge(1, 2, 1.0).unwrap();
345
346        let hierarchy = ClusterHierarchy::new(graph);
347        let lcc = hierarchy.lowest_common_cluster(0, 2);
348        assert!(lcc.is_some());
349    }
350
351    #[test]
352    fn test_dynamic_update() {
353        let graph = Arc::new(DynamicGraph::new());
354        graph.insert_edge(0, 1, 1.0).unwrap();
355        graph.insert_edge(1, 2, 1.0).unwrap();
356
357        let mut hierarchy = ClusterHierarchy::new(Arc::clone(&graph));
358        let before = hierarchy.min_boundary();
359
360        // Add edge to form cycle
361        let edge_id = graph.insert_edge(0, 2, 1.0).unwrap();
362        hierarchy.insert_edge(edge_id, 0, 2);
363
364        let after = hierarchy.min_boundary();
365        assert!(after >= before); // Adding edge can only increase min cut
366    }
367}