Skip to main content

drft/analyses/
connected_components.rs

1use super::{Analysis, AnalysisContext};
2use std::collections::{HashMap, HashSet, VecDeque};
3
4#[derive(Debug, Clone, serde::Serialize)]
5pub struct Component {
6    pub id: usize,
7    pub members: Vec<String>,
8}
9
10#[derive(Debug, Clone, serde::Serialize)]
11pub struct ConnectedComponentsResult {
12    pub component_count: usize,
13    pub components: Vec<Component>,
14}
15
16pub struct ConnectedComponents;
17
18impl Analysis for ConnectedComponents {
19    type Output = ConnectedComponentsResult;
20
21    fn name(&self) -> &str {
22        "connected-components"
23    }
24
25    fn run(&self, ctx: &AnalysisContext) -> ConnectedComponentsResult {
26        let graph = ctx.graph;
27        // Build undirected adjacency among real nodes
28        let real_nodes: Vec<&str> = graph.included_nodes().map(|(s, _)| s.as_str()).collect();
29
30        let mut adj: HashMap<&str, HashSet<&str>> = HashMap::new();
31        for node in &real_nodes {
32            adj.entry(node).or_default();
33        }
34
35        for edge in &graph.edges {
36            if graph.is_internal_edge(edge) {
37                adj.entry(edge.source.as_str())
38                    .or_default()
39                    .insert(edge.target.as_str());
40                adj.entry(edge.target.as_str())
41                    .or_default()
42                    .insert(edge.source.as_str());
43            }
44        }
45
46        // BFS to find components
47        let mut visited: HashSet<&str> = HashSet::new();
48        let mut components = Vec::new();
49
50        // Sort real_nodes for deterministic iteration order
51        let mut sorted_nodes = real_nodes;
52        sorted_nodes.sort();
53
54        for &node in &sorted_nodes {
55            if visited.contains(node) {
56                continue;
57            }
58
59            let mut members = Vec::new();
60            let mut queue = VecDeque::new();
61            queue.push_back(node);
62            visited.insert(node);
63
64            while let Some(current) = queue.pop_front() {
65                members.push(current.to_string());
66                if let Some(neighbors) = adj.get(current) {
67                    let mut sorted_neighbors: Vec<&str> = neighbors.iter().copied().collect();
68                    sorted_neighbors.sort();
69                    for neighbor in sorted_neighbors {
70                        if visited.insert(neighbor) {
71                            queue.push_back(neighbor);
72                        }
73                    }
74                }
75            }
76
77            members.sort();
78            components.push(members);
79        }
80
81        // Sort components by size descending, then by first member
82        components.sort_by(|a, b| b.len().cmp(&a.len()).then_with(|| a[0].cmp(&b[0])));
83
84        let component_count = components.len();
85        let components = components
86            .into_iter()
87            .enumerate()
88            .map(|(i, members)| Component { id: i + 1, members })
89            .collect();
90
91        ConnectedComponentsResult {
92            component_count,
93            components,
94        }
95    }
96}
97
98#[cfg(test)]
99mod tests {
100    use super::*;
101    use crate::analyses::AnalysisContext;
102    use crate::config::Config;
103    use crate::graph::Graph;
104    use crate::graph::test_helpers::{make_edge, make_node};
105    use crate::graph::{Edge, Node, NodeType};
106    use std::collections::HashMap;
107    use std::path::Path;
108
109    fn make_ctx<'a>(graph: &'a Graph, config: &'a Config) -> AnalysisContext<'a> {
110        AnalysisContext {
111            graph,
112            root: Path::new("."),
113            config,
114            lockfile: None,
115        }
116    }
117
118    #[test]
119    fn single_component() {
120        let mut graph = Graph::new();
121        graph.add_node(make_node("a.md"));
122        graph.add_node(make_node("b.md"));
123        graph.add_node(make_node("c.md"));
124        graph.add_edge(make_edge("a.md", "b.md"));
125        graph.add_edge(make_edge("b.md", "c.md"));
126
127        let config = Config::defaults();
128        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
129        assert_eq!(result.component_count, 1);
130        assert_eq!(result.components[0].members.len(), 3);
131    }
132
133    #[test]
134    fn two_components() {
135        let mut graph = Graph::new();
136        graph.add_node(make_node("a.md"));
137        graph.add_node(make_node("b.md"));
138        graph.add_node(make_node("c.md"));
139        graph.add_node(make_node("d.md"));
140        graph.add_edge(make_edge("a.md", "b.md"));
141        graph.add_edge(make_edge("c.md", "d.md"));
142
143        let config = Config::defaults();
144        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
145        assert_eq!(result.component_count, 2);
146        assert_eq!(result.components[0].members.len(), 2);
147        assert_eq!(result.components[1].members.len(), 2);
148    }
149
150    #[test]
151    fn isolated_node() {
152        let mut graph = Graph::new();
153        graph.add_node(make_node("a.md"));
154        graph.add_node(make_node("b.md"));
155        graph.add_node(make_node("c.md"));
156        graph.add_edge(make_edge("a.md", "b.md"));
157
158        let config = Config::defaults();
159        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
160        assert_eq!(result.component_count, 2);
161        assert_eq!(result.components[0].members, vec!["a.md", "b.md"]);
162        assert_eq!(result.components[1].members, vec!["c.md"]);
163    }
164
165    #[test]
166    fn external_edges_excluded() {
167        let mut graph = Graph::new();
168        graph.add_node(make_node("a.md"));
169        graph.add_node(Node {
170            path: "https://example.com".into(),
171            node_type: Some(NodeType::Uri),
172            included: false,
173            hash: None,
174            metadata: HashMap::new(),
175        });
176        graph.add_edge(Edge {
177            source: "a.md".into(),
178            target: "https://example.com".into(),
179            link: None,
180            parser: "markdown".into(),
181        });
182
183        let config = Config::defaults();
184        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
185        // Only included nodes appear in components — the URI is referenced, not included
186        assert_eq!(result.component_count, 1);
187        assert_eq!(result.components[0].members, vec!["a.md"]);
188    }
189
190    #[test]
191    fn empty_graph() {
192        let graph = Graph::new();
193        let config = Config::defaults();
194        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
195        assert_eq!(result.component_count, 0);
196        assert!(result.components.is_empty());
197    }
198
199    #[test]
200    fn undirected_connectivity() {
201        let mut graph = Graph::new();
202        graph.add_node(make_node("a.md"));
203        graph.add_node(make_node("b.md"));
204        graph.add_node(make_node("c.md"));
205        graph.add_edge(make_edge("a.md", "b.md"));
206        graph.add_edge(make_edge("c.md", "b.md"));
207
208        let config = Config::defaults();
209        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
210        assert_eq!(result.component_count, 1);
211        assert_eq!(result.components[0].members.len(), 3);
212    }
213
214    #[test]
215    fn sorted_by_size_descending() {
216        let mut graph = Graph::new();
217        graph.add_node(make_node("a.md"));
218        graph.add_node(make_node("b.md"));
219        graph.add_node(make_node("c.md"));
220        graph.add_node(make_node("d.md"));
221        graph.add_node(make_node("e.md"));
222        graph.add_edge(make_edge("a.md", "b.md"));
223        graph.add_edge(make_edge("a.md", "c.md"));
224
225        let config = Config::defaults();
226        let result = ConnectedComponents.run(&make_ctx(&graph, &config));
227        assert_eq!(result.component_count, 3);
228        assert_eq!(result.components[0].members.len(), 3);
229        assert_eq!(result.components[1].members.len(), 1);
230        assert_eq!(result.components[2].members.len(), 1);
231    }
232}