Skip to main content

drft/analyses/
connected_components.rs

1use super::{Analysis, AnalysisContext};
2use std::collections::{HashMap, HashSet, VecDeque};
3
4#[derive(Debug, Clone, serde::Serialize)]
5pub struct Component {
6    pub id: usize,
7    pub members: Vec<String>,
8}
9
10#[derive(Debug, Clone, serde::Serialize)]
11pub struct ConnectedComponentsResult {
12    pub component_count: usize,
13    pub components: Vec<Component>,
14}
15
16pub struct ConnectedComponents;
17
18impl Analysis for ConnectedComponents {
19    type Output = ConnectedComponentsResult;
20
21    fn name(&self) -> &str {
22        "connected-components"
23    }
24
25    fn run(&self, ctx: &AnalysisContext) -> ConnectedComponentsResult {
26        let graph = ctx.graph;
27        // Build undirected adjacency among real nodes
28        let real_nodes: Vec<&str> = graph
29            .nodes
30            .keys()
31            .filter(|p| graph.is_included_node(p))
32            .map(|s| s.as_str())
33            .collect();
34
35        let mut adj: HashMap<&str, HashSet<&str>> = HashMap::new();
36        for node in &real_nodes {
37            adj.entry(node).or_default();
38        }
39
40        for edge in &graph.edges {
41            if graph.is_internal_edge(edge) {
42                adj.entry(edge.source.as_str())
43                    .or_default()
44                    .insert(edge.target.as_str());
45                adj.entry(edge.target.as_str())
46                    .or_default()
47                    .insert(edge.source.as_str());
48            }
49        }
50
51        // BFS to find components
52        let mut visited: HashSet<&str> = HashSet::new();
53        let mut components = Vec::new();
54
55        // Sort real_nodes for deterministic iteration order
56        let mut sorted_nodes = real_nodes;
57        sorted_nodes.sort();
58
59        for &node in &sorted_nodes {
60            if visited.contains(node) {
61                continue;
62            }
63
64            let mut members = Vec::new();
65            let mut queue = VecDeque::new();
66            queue.push_back(node);
67            visited.insert(node);
68
69            while let Some(current) = queue.pop_front() {
70                members.push(current.to_string());
71                if let Some(neighbors) = adj.get(current) {
72                    let mut sorted_neighbors: Vec<&str> = neighbors.iter().copied().collect();
73                    sorted_neighbors.sort();
74                    for neighbor in sorted_neighbors {
75                        if visited.insert(neighbor) {
76                            queue.push_back(neighbor);
77                        }
78                    }
79                }
80            }
81
82            members.sort();
83            components.push(members);
84        }
85
86        // Sort components by size descending, then by first member
87        components.sort_by(|a, b| b.len().cmp(&a.len()).then_with(|| a[0].cmp(&b[0])));
88
89        let component_count = components.len();
90        let components = components
91            .into_iter()
92            .enumerate()
93            .map(|(i, members)| Component { id: i + 1, members })
94            .collect();
95
96        ConnectedComponentsResult {
97            component_count,
98            components,
99        }
100    }
101}
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106    use crate::analyses::AnalysisContext;
107    use crate::config::Config;
108    use crate::graph::Graph;
109    use crate::graph::test_helpers::{make_edge, make_node};
110    use crate::graph::{Node, NodeType};
111    use std::collections::HashMap;
112    use std::path::Path;
113
114    fn make_ctx<'a>(graph: &'a Graph, config: &'a Config) -> AnalysisContext<'a> {
115        AnalysisContext {
116            graph,
117            root: Path::new("."),
118            config,
119            lockfile: None,
120        }
121    }
122
123    #[test]
124    fn single_component() {
125        let mut graph = Graph::new();
126        graph.add_node(make_node("a.md"));
127        graph.add_node(make_node("b.md"));
128        graph.add_node(make_node("c.md"));
129        graph.add_edge(make_edge("a.md", "b.md"));
130        graph.add_edge(make_edge("b.md", "c.md"));
131
132        let config = Config::defaults();
133        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
134        assert_eq!(result.component_count, 1);
135        assert_eq!(result.components[0].members.len(), 3);
136    }
137
138    #[test]
139    fn two_components() {
140        let mut graph = Graph::new();
141        graph.add_node(make_node("a.md"));
142        graph.add_node(make_node("b.md"));
143        graph.add_node(make_node("c.md"));
144        graph.add_node(make_node("d.md"));
145        graph.add_edge(make_edge("a.md", "b.md"));
146        graph.add_edge(make_edge("c.md", "d.md"));
147
148        let config = Config::defaults();
149        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
150        assert_eq!(result.component_count, 2);
151        assert_eq!(result.components[0].members.len(), 2);
152        assert_eq!(result.components[1].members.len(), 2);
153    }
154
155    #[test]
156    fn isolated_node() {
157        let mut graph = Graph::new();
158        graph.add_node(make_node("a.md"));
159        graph.add_node(make_node("b.md"));
160        graph.add_node(make_node("c.md"));
161        graph.add_edge(make_edge("a.md", "b.md"));
162
163        let config = Config::defaults();
164        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
165        assert_eq!(result.component_count, 2);
166        assert_eq!(result.components[0].members, vec!["a.md", "b.md"]);
167        assert_eq!(result.components[1].members, vec!["c.md"]);
168    }
169
170    #[test]
171    fn excludes_external_nodes() {
172        let mut graph = Graph::new();
173        graph.add_node(make_node("a.md"));
174        graph.add_node(Node {
175            path: "https://example.com".into(),
176            node_type: NodeType::External,
177            hash: None,
178            graph: None,
179            is_graph: false,
180            metadata: HashMap::new(),
181            included: false,
182        });
183        graph.add_edge(make_edge("a.md", "https://example.com"));
184
185        let config = Config::defaults();
186        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
187        assert_eq!(result.component_count, 1);
188        assert_eq!(result.components[0].members, vec!["a.md"]);
189    }
190
191    #[test]
192    fn empty_graph() {
193        let graph = Graph::new();
194        let config = Config::defaults();
195        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
196        assert_eq!(result.component_count, 0);
197        assert!(result.components.is_empty());
198    }
199
200    #[test]
201    fn undirected_connectivity() {
202        let mut graph = Graph::new();
203        graph.add_node(make_node("a.md"));
204        graph.add_node(make_node("b.md"));
205        graph.add_node(make_node("c.md"));
206        graph.add_edge(make_edge("a.md", "b.md"));
207        graph.add_edge(make_edge("c.md", "b.md"));
208
209        let config = Config::defaults();
210        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
211        assert_eq!(result.component_count, 1);
212        assert_eq!(result.components[0].members.len(), 3);
213    }
214
215    #[test]
216    fn sorted_by_size_descending() {
217        let mut graph = Graph::new();
218        graph.add_node(make_node("a.md"));
219        graph.add_node(make_node("b.md"));
220        graph.add_node(make_node("c.md"));
221        graph.add_node(make_node("d.md"));
222        graph.add_node(make_node("e.md"));
223        graph.add_edge(make_edge("a.md", "b.md"));
224        graph.add_edge(make_edge("a.md", "c.md"));
225
226        let config = Config::defaults();
227        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
228        assert_eq!(result.component_count, 3);
229        assert_eq!(result.components[0].members.len(), 3);
230        assert_eq!(result.components[1].members.len(), 1);
231        assert_eq!(result.components[2].members.len(), 1);
232    }
233}