Skip to main content

drft/analyses/
connected_components.rs

1use super::{Analysis, AnalysisContext};
2use std::collections::{HashMap, HashSet, VecDeque};
3
4#[derive(Debug, Clone, serde::Serialize)]
5pub struct Component {
6    pub id: usize,
7    pub members: Vec<String>,
8}
9
10#[derive(Debug, Clone, serde::Serialize)]
11pub struct ConnectedComponentsResult {
12    pub component_count: usize,
13    pub components: Vec<Component>,
14}
15
16pub struct ConnectedComponents;
17
18impl Analysis for ConnectedComponents {
19    type Output = ConnectedComponentsResult;
20
21    fn name(&self) -> &str {
22        "connected-components"
23    }
24
25    fn run(&self, ctx: &AnalysisContext) -> ConnectedComponentsResult {
26        let graph = ctx.graph;
27        // Build undirected adjacency among real nodes
28        let real_nodes: Vec<&str> = graph
29            .nodes
30            .keys()
31            .filter(|p| graph.is_file_node(p))
32            .map(|s| s.as_str())
33            .collect();
34
35        let mut adj: HashMap<&str, HashSet<&str>> = HashMap::new();
36        for node in &real_nodes {
37            adj.entry(node).or_default();
38        }
39
40        for edge in &graph.edges {
41            if graph.is_file_node(&edge.source) && graph.is_file_node(&edge.target) {
42                adj.entry(edge.source.as_str())
43                    .or_default()
44                    .insert(edge.target.as_str());
45                adj.entry(edge.target.as_str())
46                    .or_default()
47                    .insert(edge.source.as_str());
48            }
49        }
50
51        // BFS to find components
52        let mut visited: HashSet<&str> = HashSet::new();
53        let mut components = Vec::new();
54
55        // Sort real_nodes for deterministic iteration order
56        let mut sorted_nodes = real_nodes;
57        sorted_nodes.sort();
58
59        for &node in &sorted_nodes {
60            if visited.contains(node) {
61                continue;
62            }
63
64            let mut members = Vec::new();
65            let mut queue = VecDeque::new();
66            queue.push_back(node);
67            visited.insert(node);
68
69            while let Some(current) = queue.pop_front() {
70                members.push(current.to_string());
71                if let Some(neighbors) = adj.get(current) {
72                    let mut sorted_neighbors: Vec<&str> = neighbors.iter().copied().collect();
73                    sorted_neighbors.sort();
74                    for neighbor in sorted_neighbors {
75                        if visited.insert(neighbor) {
76                            queue.push_back(neighbor);
77                        }
78                    }
79                }
80            }
81
82            members.sort();
83            components.push(members);
84        }
85
86        // Sort components by size descending, then by first member
87        components.sort_by(|a, b| b.len().cmp(&a.len()).then_with(|| a[0].cmp(&b[0])));
88
89        let component_count = components.len();
90        let components = components
91            .into_iter()
92            .enumerate()
93            .map(|(i, members)| Component { id: i + 1, members })
94            .collect();
95
96        ConnectedComponentsResult {
97            component_count,
98            components,
99        }
100    }
101}
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106    use crate::analyses::AnalysisContext;
107    use crate::config::Config;
108    use crate::graph::Graph;
109    use crate::graph::test_helpers::{make_edge, make_node};
110    use crate::graph::{Node, NodeType};
111    use std::collections::HashMap;
112    use std::path::Path;
113
114    fn make_ctx<'a>(graph: &'a Graph, config: &'a Config) -> AnalysisContext<'a> {
115        AnalysisContext {
116            graph,
117            root: Path::new("."),
118            config,
119            lockfile: None,
120        }
121    }
122
123    #[test]
124    fn single_component() {
125        let mut graph = Graph::new();
126        graph.add_node(make_node("a.md"));
127        graph.add_node(make_node("b.md"));
128        graph.add_node(make_node("c.md"));
129        graph.add_edge(make_edge("a.md", "b.md"));
130        graph.add_edge(make_edge("b.md", "c.md"));
131
132        let config = Config::defaults();
133        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
134        assert_eq!(result.component_count, 1);
135        assert_eq!(result.components[0].members.len(), 3);
136    }
137
138    #[test]
139    fn two_components() {
140        let mut graph = Graph::new();
141        graph.add_node(make_node("a.md"));
142        graph.add_node(make_node("b.md"));
143        graph.add_node(make_node("c.md"));
144        graph.add_node(make_node("d.md"));
145        graph.add_edge(make_edge("a.md", "b.md"));
146        graph.add_edge(make_edge("c.md", "d.md"));
147
148        let config = Config::defaults();
149        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
150        assert_eq!(result.component_count, 2);
151        assert_eq!(result.components[0].members.len(), 2);
152        assert_eq!(result.components[1].members.len(), 2);
153    }
154
155    #[test]
156    fn isolated_node() {
157        let mut graph = Graph::new();
158        graph.add_node(make_node("a.md"));
159        graph.add_node(make_node("b.md"));
160        graph.add_node(make_node("c.md"));
161        graph.add_edge(make_edge("a.md", "b.md"));
162
163        let config = Config::defaults();
164        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
165        assert_eq!(result.component_count, 2);
166        assert_eq!(result.components[0].members, vec!["a.md", "b.md"]);
167        assert_eq!(result.components[1].members, vec!["c.md"]);
168    }
169
170    #[test]
171    fn excludes_external_nodes() {
172        let mut graph = Graph::new();
173        graph.add_node(make_node("a.md"));
174        graph.add_node(Node {
175            path: "https://example.com".into(),
176            node_type: NodeType::External,
177            hash: None,
178            graph: None,
179            is_graph: false,
180            metadata: HashMap::new(),
181        });
182        graph.add_edge(make_edge("a.md", "https://example.com"));
183
184        let config = Config::defaults();
185        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
186        assert_eq!(result.component_count, 1);
187        assert_eq!(result.components[0].members, vec!["a.md"]);
188    }
189
190    #[test]
191    fn empty_graph() {
192        let graph = Graph::new();
193        let config = Config::defaults();
194        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
195        assert_eq!(result.component_count, 0);
196        assert!(result.components.is_empty());
197    }
198
199    #[test]
200    fn undirected_connectivity() {
201        let mut graph = Graph::new();
202        graph.add_node(make_node("a.md"));
203        graph.add_node(make_node("b.md"));
204        graph.add_node(make_node("c.md"));
205        graph.add_edge(make_edge("a.md", "b.md"));
206        graph.add_edge(make_edge("c.md", "b.md"));
207
208        let config = Config::defaults();
209        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
210        assert_eq!(result.component_count, 1);
211        assert_eq!(result.components[0].members.len(), 3);
212    }
213
214    #[test]
215    fn sorted_by_size_descending() {
216        let mut graph = Graph::new();
217        graph.add_node(make_node("a.md"));
218        graph.add_node(make_node("b.md"));
219        graph.add_node(make_node("c.md"));
220        graph.add_node(make_node("d.md"));
221        graph.add_node(make_node("e.md"));
222        graph.add_edge(make_edge("a.md", "b.md"));
223        graph.add_edge(make_edge("a.md", "c.md"));
224
225        let config = Config::defaults();
226        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
227        assert_eq!(result.component_count, 3);
228        assert_eq!(result.components[0].members.len(), 3);
229        assert_eq!(result.components[1].members.len(), 1);
230        assert_eq!(result.components[2].members.len(), 1);
231    }
232}