Skip to main content

drft/analyses/
scc.rs

1use super::{Analysis, AnalysisContext};
2use crate::graph::Graph;
3use std::collections::HashMap;
4
5#[derive(Debug, Clone, serde::Serialize)]
6pub struct Scc {
7    pub id: usize,
8    pub members: Vec<String>,
9}
10
11#[derive(Debug, Clone, serde::Serialize)]
12pub struct SccResult {
13    pub scc_count: usize,
14    pub nontrivial_count: usize,
15    pub sccs: Vec<Scc>,
16    pub node_scc: HashMap<String, usize>,
17}
18
19pub struct StronglyConnectedComponents;
20
21impl Analysis for StronglyConnectedComponents {
22    type Output = SccResult;
23
24    fn name(&self) -> &str {
25        "scc"
26    }
27
28    fn run(&self, ctx: &AnalysisContext) -> SccResult {
29        let graph = ctx.graph;
30        let real_nodes: Vec<&str> = graph
31            .nodes
32            .keys()
33            .filter(|p| graph.is_file_node(p))
34            .map(|s| s.as_str())
35            .collect();
36
37        let mut state = TarjanState {
38            graph,
39            index_counter: 0,
40            stack: Vec::new(),
41            on_stack: HashMap::new(),
42            index: HashMap::new(),
43            lowlink: HashMap::new(),
44            components: Vec::new(),
45        };
46
47        // Sort for deterministic output
48        let mut sorted_nodes = real_nodes;
49        sorted_nodes.sort();
50
51        for &node in &sorted_nodes {
52            if !state.index.contains_key(node) {
53                state.strongconnect(node);
54            }
55        }
56
57        // Build result
58        let all_components = state.components;
59        let scc_count = all_components.len();
60
61        // Check for self-loops to determine non-trivial single-node SCCs
62        let mut has_self_loop: HashMap<&str, bool> = HashMap::new();
63        for edge in &graph.edges {
64            if edge.source == edge.target && graph.is_file_node(&edge.source) {
65                has_self_loop.insert(edge.source.as_str(), true);
66            }
67        }
68
69        // Non-trivial SCCs: size > 1, or size == 1 with a self-loop
70        let nontrivial: Vec<Vec<String>> = all_components
71            .iter()
72            .filter(|c| c.len() > 1 || (c.len() == 1 && has_self_loop.contains_key(c[0].as_str())))
73            .cloned()
74            .collect();
75
76        let nontrivial_count = nontrivial.len();
77
78        // Assign SCC IDs to all nodes
79        let mut node_scc: HashMap<String, usize> = HashMap::new();
80        for (i, component) in all_components.iter().enumerate() {
81            for member in component {
82                node_scc.insert(member.clone(), i + 1);
83            }
84        }
85
86        // Build output SCCs (non-trivial only, sorted by size desc then first member)
87        let mut output_sccs = nontrivial;
88        output_sccs.sort_by(|a, b| b.len().cmp(&a.len()).then_with(|| a[0].cmp(&b[0])));
89
90        let sccs = output_sccs
91            .into_iter()
92            .enumerate()
93            .map(|(i, members)| Scc { id: i + 1, members })
94            .collect();
95
96        SccResult {
97            scc_count,
98            nontrivial_count,
99            sccs,
100            node_scc,
101        }
102    }
103}
104
105struct TarjanState<'a> {
106    graph: &'a Graph,
107    index_counter: usize,
108    stack: Vec<&'a str>,
109    on_stack: HashMap<&'a str, bool>,
110    index: HashMap<&'a str, usize>,
111    lowlink: HashMap<&'a str, usize>,
112    components: Vec<Vec<String>>,
113}
114
115impl<'a> TarjanState<'a> {
116    fn strongconnect(&mut self, v: &'a str) {
117        self.index.insert(v, self.index_counter);
118        self.lowlink.insert(v, self.index_counter);
119        self.index_counter += 1;
120        self.stack.push(v);
121        self.on_stack.insert(v, true);
122
123        // Visit successors
124        if let Some(edge_indices) = self.graph.forward.get(v) {
125            for &idx in edge_indices {
126                let w = self.graph.edges[idx].target.as_str();
127                if !self.graph.is_file_node(w) {
128                    continue;
129                }
130                if !self.index.contains_key(w) {
131                    self.strongconnect(w);
132                    let w_low = self.lowlink[w];
133                    let v_low = self.lowlink[v];
134                    if w_low < v_low {
135                        self.lowlink.insert(v, w_low);
136                    }
137                } else if self.on_stack.get(w) == Some(&true) {
138                    let w_idx = self.index[w];
139                    let v_low = self.lowlink[v];
140                    if w_idx < v_low {
141                        self.lowlink.insert(v, w_idx);
142                    }
143                }
144            }
145        }
146
147        // If v is a root node, pop the stack to form an SCC
148        if self.lowlink[v] == self.index[v] {
149            let mut component = Vec::new();
150            loop {
151                let w = self.stack.pop().unwrap();
152                self.on_stack.insert(w, false);
153                component.push(w.to_string());
154                if w == v {
155                    break;
156                }
157            }
158            component.sort();
159            self.components.push(component);
160        }
161    }
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167    use crate::analyses::AnalysisContext;
168    use crate::config::Config;
169    use crate::graph::Graph;
170    use crate::graph::test_helpers::{make_edge, make_node};
171    use std::path::Path;
172
173    fn make_ctx<'a>(graph: &'a Graph, config: &'a Config) -> AnalysisContext<'a> {
174        AnalysisContext {
175            graph,
176            root: Path::new("."),
177            config,
178            lockfile: None,
179        }
180    }
181
182    #[test]
183    fn dag_has_no_nontrivial_sccs() {
184        let mut graph = Graph::new();
185        graph.add_node(make_node("a.md"));
186        graph.add_node(make_node("b.md"));
187        graph.add_node(make_node("c.md"));
188        graph.add_edge(make_edge("a.md", "b.md"));
189        graph.add_edge(make_edge("b.md", "c.md"));
190
191        let config = Config::defaults();
192        let result = StronglyConnectedComponents.run(&make_ctx(&graph, &config));
193        assert_eq!(result.scc_count, 3);
194        assert_eq!(result.nontrivial_count, 0);
195        assert!(result.sccs.is_empty());
196    }
197
198    #[test]
199    fn simple_cycle() {
200        let mut graph = Graph::new();
201        graph.add_node(make_node("a.md"));
202        graph.add_node(make_node("b.md"));
203        graph.add_node(make_node("c.md"));
204        graph.add_edge(make_edge("a.md", "b.md"));
205        graph.add_edge(make_edge("b.md", "c.md"));
206        graph.add_edge(make_edge("c.md", "a.md"));
207
208        let config = Config::defaults();
209        let result = StronglyConnectedComponents.run(&make_ctx(&graph, &config));
210        assert_eq!(result.nontrivial_count, 1);
211        assert_eq!(result.sccs.len(), 1);
212        assert_eq!(result.sccs[0].members, vec!["a.md", "b.md", "c.md"]);
213    }
214
215    #[test]
216    fn two_separate_cycles() {
217        let mut graph = Graph::new();
218        graph.add_node(make_node("a.md"));
219        graph.add_node(make_node("b.md"));
220        graph.add_node(make_node("c.md"));
221        graph.add_node(make_node("d.md"));
222        graph.add_edge(make_edge("a.md", "b.md"));
223        graph.add_edge(make_edge("b.md", "a.md"));
224        graph.add_edge(make_edge("c.md", "d.md"));
225        graph.add_edge(make_edge("d.md", "c.md"));
226
227        let config = Config::defaults();
228        let result = StronglyConnectedComponents.run(&make_ctx(&graph, &config));
229        assert_eq!(result.nontrivial_count, 2);
230        assert_eq!(result.sccs.len(), 2);
231    }
232
233    #[test]
234    fn mixed_cyclic_and_acyclic() {
235        let mut graph = Graph::new();
236        graph.add_node(make_node("a.md"));
237        graph.add_node(make_node("b.md"));
238        graph.add_node(make_node("c.md"));
239        graph.add_edge(make_edge("a.md", "b.md"));
240        graph.add_edge(make_edge("b.md", "c.md"));
241        graph.add_edge(make_edge("c.md", "b.md"));
242
243        let config = Config::defaults();
244        let result = StronglyConnectedComponents.run(&make_ctx(&graph, &config));
245        assert_eq!(result.nontrivial_count, 1);
246        assert_eq!(result.sccs[0].members, vec!["b.md", "c.md"]);
247        assert!(result.node_scc.contains_key("a.md"));
248    }
249
250    #[test]
251    fn self_loop_is_nontrivial() {
252        let mut graph = Graph::new();
253        graph.add_node(make_node("a.md"));
254        graph.add_edge(make_edge("a.md", "a.md"));
255
256        let config = Config::defaults();
257        let result = StronglyConnectedComponents.run(&make_ctx(&graph, &config));
258        assert_eq!(result.nontrivial_count, 1);
259        assert_eq!(result.sccs[0].members, vec!["a.md"]);
260    }
261
262    #[test]
263    fn empty_graph() {
264        let graph = Graph::new();
265        let config = Config::defaults();
266        let result = StronglyConnectedComponents.run(&make_ctx(&graph, &config));
267        assert_eq!(result.scc_count, 0);
268        assert_eq!(result.nontrivial_count, 0);
269    }
270
271    #[test]
272    fn every_node_has_scc_id() {
273        let mut graph = Graph::new();
274        graph.add_node(make_node("a.md"));
275        graph.add_node(make_node("b.md"));
276        graph.add_node(make_node("c.md"));
277        graph.add_edge(make_edge("a.md", "b.md"));
278        graph.add_edge(make_edge("b.md", "c.md"));
279
280        let config = Config::defaults();
281        let result = StronglyConnectedComponents.run(&make_ctx(&graph, &config));
282        assert!(result.node_scc.contains_key("a.md"));
283        assert!(result.node_scc.contains_key("b.md"));
284        assert!(result.node_scc.contains_key("c.md"));
285    }
286}