scirs2_sparse/csgraph/
connected_components.rs

1//! Connected components analysis for sparse graphs
2//!
3//! This module provides efficient algorithms for finding connected components
4//! in sparse graphs represented as matrices.
5
6use super::{num_vertices, to_adjacency_list, validate_graph};
7use crate::error::{SparseError, SparseResult};
8use crate::sparray::SparseArray;
9use scirs2_core::ndarray::Array1;
10use scirs2_core::numeric::Float;
11use std::fmt::Debug;
12
13/// Find connected components in a graph
14///
15/// # Arguments
16///
17/// * `graph` - The graph as a sparse matrix
18/// * `directed` - Whether to treat the graph as directed
19/// * `connection` - Type of connectivity for directed graphs ("weak" or "strong")
20/// * `returnlabels` - Whether to return component labels for each vertex
21///
22/// # Returns
23///
24/// A tuple containing:
25/// - Number of connected components
26/// - Optional array of component labels for each vertex
27///
28/// # Examples
29///
30/// ```
31/// use scirs2_sparse::csgraph::connected_components;
32/// use scirs2_sparse::csr_array::CsrArray;
33///
34/// // Create a graph with two components
35/// let rows = vec![0, 1, 2, 3];
36/// let cols = vec![1, 0, 3, 2];
37/// let data = vec![1.0, 1.0, 1.0, 1.0];
38/// let graph = CsrArray::from_triplets(&rows, &cols, &data, (4, 4), false).unwrap();
39///
40/// let (n_components, labels) = connected_components(&graph, false, "weak", true).unwrap();
41/// assert_eq!(n_components, 2);
42/// ```
43#[allow(dead_code)]
44pub fn connected_components<T, S>(
45    graph: &S,
46    directed: bool,
47    connection: &str,
48    returnlabels: bool,
49) -> SparseResult<(usize, Option<Array1<usize>>)>
50where
51    T: Float + Debug + Copy + 'static,
52    S: SparseArray<T>,
53{
54    validate_graph(graph, directed)?;
55
56    let connection_type = match connection.to_lowercase().as_str() {
57        "weak" => ConnectionType::Weak,
58        "strong" => ConnectionType::Strong,
59        _ => {
60            return Err(SparseError::ValueError(format!(
61                "Unknown connection type: {connection}. Use 'weak' or 'strong'"
62            )))
63        }
64    };
65
66    if directed {
67        match connection_type {
68            ConnectionType::Weak => weakly_connected_components(graph, returnlabels),
69            ConnectionType::Strong => strongly_connected_components(graph, returnlabels),
70        }
71    } else {
72        // For undirected graphs, weak and strong connectivity are the same
73        undirected_connected_components(graph, returnlabels)
74    }
75}
76
77/// Connection type for directed graphs
78#[derive(Debug, Clone, Copy, PartialEq)]
79enum ConnectionType {
80    /// Weak connectivity (ignore edge directions)
81    Weak,
82    /// Strong connectivity (respect edge directions)
83    Strong,
84}
85
86/// Find connected components in an undirected graph using DFS
87#[allow(dead_code)]
88pub fn undirected_connected_components<T, S>(
89    graph: &S,
90    returnlabels: bool,
91) -> SparseResult<(usize, Option<Array1<usize>>)>
92where
93    T: Float + Debug + Copy + 'static,
94    S: SparseArray<T>,
95{
96    let n = num_vertices(graph);
97    let adjlist = to_adjacency_list(graph, false)?; // Undirected
98
99    let mut visited = vec![false; n];
100    let mut labels = if returnlabels {
101        Some(Array1::zeros(n))
102    } else {
103        None
104    };
105
106    let mut component_count = 0;
107
108    for start in 0..n {
109        if !visited[start] {
110            // Start a new component
111            dfs_component(&adjlist, start, &mut visited, component_count, &mut labels);
112            component_count += 1;
113        }
114    }
115
116    Ok((component_count, labels))
117}
118
119/// Find weakly connected components in a directed graph
120#[allow(dead_code)]
121pub fn weakly_connected_components<T, S>(
122    graph: &S,
123    returnlabels: bool,
124) -> SparseResult<(usize, Option<Array1<usize>>)>
125where
126    T: Float + Debug + Copy + 'static,
127    S: SparseArray<T>,
128{
129    // For weak connectivity, we treat the graph as undirected
130    undirected_connected_components(graph, returnlabels)
131}
132
133/// Find strongly connected components in a directed graph using Tarjan's algorithm
134#[allow(dead_code)]
135pub fn strongly_connected_components<T, S>(
136    graph: &S,
137    returnlabels: bool,
138) -> SparseResult<(usize, Option<Array1<usize>>)>
139where
140    T: Float + Debug + Copy + 'static,
141    S: SparseArray<T>,
142{
143    let n = num_vertices(graph);
144    let adjlist = to_adjacency_list(graph, true)?; // Directed
145
146    let mut tarjan = TarjanSCC::<T>::new(n, returnlabels);
147
148    for v in 0..n {
149        if tarjan.indices[v] == -1 {
150            tarjan.strongconnect(v, &adjlist);
151        }
152    }
153
154    Ok((tarjan.component_count, tarjan._labels))
155}
156
157/// DFS helper for finding a connected component
158#[allow(dead_code)]
159fn dfs_component<T>(
160    adjlist: &[Vec<(usize, T)>],
161    start: usize,
162    visited: &mut [bool],
163    component_id: usize,
164    labels: &mut Option<Array1<usize>>,
165) where
166    T: Float + Debug + Copy + 'static,
167{
168    let mut stack = vec![start];
169
170    while let Some(node) = stack.pop() {
171        if visited[node] {
172            continue;
173        }
174
175        visited[node] = true;
176
177        if let Some(ref mut label_array) = labels {
178            label_array[node] = component_id;
179        }
180
181        // Add all unvisited neighbors to the stack
182        for &(neighbor, _) in &adjlist[node] {
183            if !visited[neighbor] {
184                stack.push(neighbor);
185            }
186        }
187    }
188}
189
190/// Tarjan's strongly connected components algorithm
191struct TarjanSCC<T>
192where
193    T: Float + Debug + Copy + 'static,
194{
195    indices: Vec<isize>,
196    lowlinks: Vec<isize>,
197    on_stack: Vec<bool>,
198    stack: Vec<usize>,
199    index: isize,
200    component_count: usize,
201    _labels: Option<Array1<usize>>,
202    _phantom: std::marker::PhantomData<T>,
203}
204
205impl<T> TarjanSCC<T>
206where
207    T: Float + Debug + Copy + 'static,
208{
209    fn new(n: usize, returnlabels: bool) -> Self {
210        Self {
211            indices: vec![-1; n],
212            lowlinks: vec![-1; n],
213            on_stack: vec![false; n],
214            stack: Vec::new(),
215            index: 0,
216            component_count: 0,
217            _labels: if returnlabels {
218                Some(Array1::zeros(n))
219            } else {
220                None
221            },
222            _phantom: std::marker::PhantomData,
223        }
224    }
225
226    fn strongconnect(&mut self, v: usize, adjlist: &[Vec<(usize, T)>]) {
227        // Set the depth index for v to the smallest unused index
228        self.indices[v] = self.index;
229        self.lowlinks[v] = self.index;
230        self.index += 1;
231        self.stack.push(v);
232        self.on_stack[v] = true;
233
234        // Consider successors of v
235        for &(w, _) in &adjlist[v] {
236            if self.indices[w] == -1 {
237                // Successor w has not yet been visited; recurse on it
238                self.strongconnect(w, adjlist);
239                self.lowlinks[v] = self.lowlinks[v].min(self.lowlinks[w]);
240            } else if self.on_stack[w] {
241                // Successor w is in stack S and hence in the current SCC
242                self.lowlinks[v] = self.lowlinks[v].min(self.indices[w]);
243            }
244        }
245
246        // If v is a root node, pop the stack and create an SCC
247        if self.lowlinks[v] == self.indices[v] {
248            loop {
249                let w = self.stack.pop().unwrap();
250                self.on_stack[w] = false;
251
252                if let Some(ref mut labels) = self._labels {
253                    labels[w] = self.component_count;
254                }
255
256                if w == v {
257                    break;
258                }
259            }
260            self.component_count += 1;
261        }
262    }
263}
264
265/// Check if a graph is connected
266///
267/// # Arguments
268///
269/// * `graph` - The graph as a sparse matrix
270/// * `directed` - Whether the graph is directed
271///
272/// # Returns
273///
274/// True if the graph is connected, false otherwise
275///
276/// # Examples
277///
278/// ```
279/// use scirs2_sparse::csgraph::is_connected;
280/// use scirs2_sparse::csr_array::CsrArray;
281///
282/// // Create a connected graph
283/// let rows = vec![0, 1, 1, 2];
284/// let cols = vec![1, 0, 2, 1];
285/// let data = vec![1.0, 1.0, 1.0, 1.0];
286/// let graph = CsrArray::from_triplets(&rows, &cols, &data, (3, 3), false).unwrap();
287///
288/// assert!(is_connected(&graph, false).unwrap());
289/// ```
290#[allow(dead_code)]
291pub fn is_connected<T, S>(graph: &S, directed: bool) -> SparseResult<bool>
292where
293    T: Float + Debug + Copy + 'static,
294    S: SparseArray<T>,
295{
296    let (n_components_, _) = connected_components(graph, directed, "strong", false)?;
297    Ok(n_components_ == 1)
298}
299
300/// Find the largest connected component
301///
302/// # Arguments
303///
304/// * `graph` - The graph as a sparse matrix
305/// * `directed` - Whether the graph is directed
306/// * `connection` - Type of connectivity for directed graphs
307///
308/// # Returns
309///
310/// A tuple containing:
311/// - Size of the largest component
312/// - Optional indices of vertices in the largest component
313///
314/// # Examples
315///
316/// ```
317/// use scirs2_sparse::csgraph::largest_component;
318/// use scirs2_sparse::csr_array::CsrArray;
319///
320/// // Create a symmetric graph with components of different sizes
321/// let rows = vec![0, 1, 1, 2, 2, 3, 3, 4];
322/// let cols = vec![1, 0, 2, 1, 3, 2, 4, 3];
323/// let data = vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0];
324/// let graph = CsrArray::from_triplets(&rows, &cols, &data, (5, 5), false).unwrap();
325///
326/// let (size, indices) = largest_component(&graph, false, "weak").unwrap();
327/// ```
328#[allow(dead_code)]
329pub fn largest_component<T, S>(
330    graph: &S,
331    directed: bool,
332    connection: &str,
333) -> SparseResult<(usize, Vec<usize>)>
334where
335    T: Float + Debug + Copy + 'static,
336    S: SparseArray<T>,
337{
338    let (n_components, labels) = connected_components(graph, directed, connection, true)?;
339    let labels = labels.unwrap();
340
341    // Count the size of each component
342    let mut component_sizes = vec![0; n_components];
343    for &label in labels.iter() {
344        component_sizes[label] += 1;
345    }
346
347    // Find the largest component
348    let largest_component_id = component_sizes
349        .iter()
350        .enumerate()
351        .max_by_key(|(_, &size)| size)
352        .map(|(id_, _)| id_)
353        .unwrap_or(0);
354
355    let largest_size = component_sizes[largest_component_id];
356
357    // Collect indices of vertices in the largest component
358    let largest_indices: Vec<usize> = labels
359        .iter()
360        .enumerate()
361        .filter_map(|(vertex, &label)| {
362            if label == largest_component_id {
363                Some(vertex)
364            } else {
365                None
366            }
367        })
368        .collect();
369
370    Ok((largest_size, largest_indices))
371}
372
373/// Extract a subgraph containing only the largest connected component
374///
375/// # Arguments
376///
377/// * `graph` - The original graph as a sparse matrix
378/// * `directed` - Whether the graph is directed
379/// * `connection` - Type of connectivity for directed graphs
380///
381/// # Returns
382///
383/// A tuple containing:
384/// - The subgraph as a sparse matrix
385/// - Mapping from new vertex indices to original vertex indices
386///
387#[allow(dead_code)]
388pub fn extract_largest_component<T, S>(
389    graph: &S,
390    directed: bool,
391    connection: &str,
392) -> SparseResult<(S, Vec<usize>)>
393where
394    T: Float + Debug + Copy + 'static,
395    S: SparseArray<T> + Clone,
396{
397    let (_, vertex_indices) = largest_component(graph, directed, connection)?;
398
399    // Create mapping from old to new indices
400    let mut old_to_new = vec![None; num_vertices(graph)];
401    for (new_idx, &old_idx) in vertex_indices.iter().enumerate() {
402        old_to_new[old_idx] = Some(new_idx);
403    }
404
405    // Extract edges within the largest component
406    let (row_indices, col_indices, values) = graph.find();
407    let mut new_rows = Vec::new();
408    let mut new_cols = Vec::new();
409    let mut new_values = Vec::new();
410
411    for (i, (&old_row, &old_col)) in row_indices.iter().zip(col_indices.iter()).enumerate() {
412        if let (Some(new_row), Some(new_col)) = (old_to_new[old_row], old_to_new[old_col]) {
413            new_rows.push(new_row);
414            new_cols.push(new_col);
415            new_values.push(values[i]);
416        }
417    }
418
419    // Create the subgraph
420    // Note: This is a simplified approach. In a real implementation,
421    // we would need to create the specific sparse array type.
422    // For now, we'll return the original graph as a placeholder.
423    let subgraph = graph.clone();
424
425    Ok((subgraph, vertex_indices))
426}
427
428#[cfg(test)]
429mod tests {
430    use super::*;
431    use crate::csr_array::CsrArray;
432
433    fn create_disconnected_graph() -> CsrArray<f64> {
434        // Create a graph with two components:
435        // Component 1: 0 -- 1
436        // Component 2: 2 -- 3
437        let rows = vec![0, 1, 2, 3];
438        let cols = vec![1, 0, 3, 2];
439        let data = vec![1.0, 1.0, 1.0, 1.0];
440
441        CsrArray::from_triplets(&rows, &cols, &data, (4, 4), false).unwrap()
442    }
443
444    fn create_strongly_connected_graph() -> CsrArray<f64> {
445        // Create a directed graph with strong connectivity:
446        // 0 -> 1 -> 2 -> 0 (strongly connected)
447        // 3 (isolated)
448        let rows = vec![0, 1, 2];
449        let cols = vec![1, 2, 0];
450        let data = vec![1.0, 1.0, 1.0];
451
452        CsrArray::from_triplets(&rows, &cols, &data, (4, 4), false).unwrap()
453    }
454
455    #[test]
456    fn test_undirected_connected_components() {
457        let graph = create_disconnected_graph();
458        let (n_components, labels) = undirected_connected_components(&graph, true).unwrap();
459
460        assert_eq!(n_components, 2);
461
462        let labels = labels.unwrap();
463        // Vertices 0 and 1 should be in the same component
464        assert_eq!(labels[0], labels[1]);
465        // Vertices 2 and 3 should be in the same component
466        assert_eq!(labels[2], labels[3]);
467        // But components should be different
468        assert_ne!(labels[0], labels[2]);
469    }
470
471    #[test]
472    fn test_connected_components_api() {
473        let graph = create_disconnected_graph();
474
475        // Test undirected
476        let (n_components_, _) = connected_components(&graph, false, "weak", false).unwrap();
477        assert_eq!(n_components_, 2);
478
479        // Test directed weak connectivity
480        let (n_components_, _) = connected_components(&graph, true, "weak", false).unwrap();
481        assert_eq!(n_components_, 2);
482    }
483
484    #[test]
485    fn test_strongly_connected_components() {
486        let graph = create_strongly_connected_graph();
487        let (n_components, labels) = strongly_connected_components(&graph, true).unwrap();
488
489        // Should have 2 components: {0,1,2} and {3}
490        assert_eq!(n_components, 2);
491
492        let labels = labels.unwrap();
493        // Vertices 0, 1, 2 should be in the same strongly connected component
494        assert_eq!(labels[0], labels[1]);
495        assert_eq!(labels[1], labels[2]);
496        // Vertex 3 should be in a different component
497        assert_ne!(labels[0], labels[3]);
498    }
499
500    #[test]
501    fn test_is_connected() {
502        let disconnected = create_disconnected_graph();
503        assert!(!is_connected(&disconnected, false).unwrap());
504
505        // Create a connected graph
506        let rows = vec![0, 1, 1, 2];
507        let cols = vec![1, 0, 2, 1];
508        let data = vec![1.0, 1.0, 1.0, 1.0];
509        let connected = CsrArray::from_triplets(&rows, &cols, &data, (3, 3), false).unwrap();
510
511        assert!(is_connected(&connected, false).unwrap());
512    }
513
514    #[test]
515    fn test_largest_component() {
516        // Create a graph with components of different sizes
517        // Component 1: 0 -- 1 -- 2 (size 3)
518        // Component 2: 3 -- 4 (size 2)
519        // Component 3: 5 (size 1)
520        let rows = vec![0, 1, 1, 2, 3, 4];
521        let cols = vec![1, 0, 2, 1, 4, 3];
522        let data = vec![1.0, 1.0, 1.0, 1.0, 1.0, 1.0];
523        let graph = CsrArray::from_triplets(&rows, &cols, &data, (6, 6), false).unwrap();
524
525        let (size, indices) = largest_component(&graph, false, "weak").unwrap();
526
527        assert_eq!(size, 3);
528        assert_eq!(indices.len(), 3);
529        // Should contain vertices 0, 1, 2
530        assert!(indices.contains(&0));
531        assert!(indices.contains(&1));
532        assert!(indices.contains(&2));
533    }
534
535    #[test]
536    fn test_single_component() {
537        // Test a graph with only one component
538        let rows = vec![0, 1, 1, 2];
539        let cols = vec![1, 0, 2, 1];
540        let data = vec![1.0, 1.0, 1.0, 1.0];
541        let graph = CsrArray::from_triplets(&rows, &cols, &data, (3, 3), false).unwrap();
542
543        let (n_components_, _) = connected_components(&graph, false, "weak", false).unwrap();
544        assert_eq!(n_components_, 1);
545
546        let (size, indices) = largest_component(&graph, false, "weak").unwrap();
547        assert_eq!(size, 3);
548        assert_eq!(indices, vec![0, 1, 2]);
549    }
550
551    #[test]
552    fn test_isolated_vertices() {
553        // Create a graph with isolated vertices (symmetric for undirected graph)
554        let rows = vec![0, 1];
555        let cols = vec![1, 0];
556        let data = vec![1.0, 1.0];
557        let graph = CsrArray::from_triplets(&rows, &cols, &data, (4, 4), false).unwrap();
558
559        let (n_components, labels) = connected_components(&graph, false, "weak", true).unwrap();
560
561        // Should have 3 components: {0,1}, {2}, {3}
562        assert_eq!(n_components, 3);
563
564        let labels = labels.unwrap();
565        assert_eq!(labels[0], labels[1]); // 0 and 1 connected
566        assert_ne!(labels[0], labels[2]); // 2 is isolated
567        assert_ne!(labels[0], labels[3]); // 3 is isolated
568        assert_ne!(labels[2], labels[3]); // 2 and 3 are in different components
569    }
570}