Skip to main content

drft/analyses/
scc.rs

1use super::{Analysis, AnalysisContext};
2use crate::graph::Graph;
3use std::collections::HashMap;
4
5#[derive(Debug, Clone, serde::Serialize)]
6pub struct Scc {
7    pub id: usize,
8    pub members: Vec<String>,
9}
10
11#[derive(Debug, Clone, serde::Serialize)]
12pub struct SccResult {
13    pub scc_count: usize,
14    pub nontrivial_count: usize,
15    pub sccs: Vec<Scc>,
16    pub node_scc: HashMap<String, usize>,
17}
18
19pub struct StronglyConnectedComponents;
20
21impl Analysis for StronglyConnectedComponents {
22    type Output = SccResult;
23
24    fn name(&self) -> &str {
25        "scc"
26    }
27
28    fn run(&self, ctx: &AnalysisContext) -> SccResult {
29        let graph = ctx.graph;
30        let real_nodes: Vec<&str> = graph.included_nodes().map(|(s, _)| s.as_str()).collect();
31
32        let mut state = TarjanState {
33            graph,
34            index_counter: 0,
35            stack: Vec::new(),
36            on_stack: HashMap::new(),
37            index: HashMap::new(),
38            lowlink: HashMap::new(),
39            components: Vec::new(),
40        };
41
42        // Sort for deterministic output
43        let mut sorted_nodes = real_nodes;
44        sorted_nodes.sort();
45
46        for &node in &sorted_nodes {
47            if !state.index.contains_key(node) {
48                state.strongconnect(node);
49            }
50        }
51
52        // Build result
53        let all_components = state.components;
54        let scc_count = all_components.len();
55
56        // Check for self-loops to determine non-trivial single-node SCCs
57        let mut has_self_loop: HashMap<&str, bool> = HashMap::new();
58        for edge in &graph.edges {
59            if edge.source == edge.target && graph.is_internal_edge(edge) {
60                has_self_loop.insert(edge.source.as_str(), true);
61            }
62        }
63
64        // Non-trivial SCCs: size > 1, or size == 1 with a self-loop
65        let nontrivial: Vec<Vec<String>> = all_components
66            .iter()
67            .filter(|c| c.len() > 1 || (c.len() == 1 && has_self_loop.contains_key(c[0].as_str())))
68            .cloned()
69            .collect();
70
71        let nontrivial_count = nontrivial.len();
72
73        // Assign SCC IDs to all nodes
74        let mut node_scc: HashMap<String, usize> = HashMap::new();
75        for (i, component) in all_components.iter().enumerate() {
76            for member in component {
77                node_scc.insert(member.clone(), i + 1);
78            }
79        }
80
81        // Build output SCCs (non-trivial only, sorted by size desc then first member)
82        let mut output_sccs = nontrivial;
83        output_sccs.sort_by(|a, b| b.len().cmp(&a.len()).then_with(|| a[0].cmp(&b[0])));
84
85        let sccs = output_sccs
86            .into_iter()
87            .enumerate()
88            .map(|(i, members)| Scc { id: i + 1, members })
89            .collect();
90
91        SccResult {
92            scc_count,
93            nontrivial_count,
94            sccs,
95            node_scc,
96        }
97    }
98}
99
100struct TarjanState<'a> {
101    graph: &'a Graph,
102    index_counter: usize,
103    stack: Vec<&'a str>,
104    on_stack: HashMap<&'a str, bool>,
105    index: HashMap<&'a str, usize>,
106    lowlink: HashMap<&'a str, usize>,
107    components: Vec<Vec<String>>,
108}
109
110impl<'a> TarjanState<'a> {
111    fn strongconnect(&mut self, v: &'a str) {
112        self.index.insert(v, self.index_counter);
113        self.lowlink.insert(v, self.index_counter);
114        self.index_counter += 1;
115        self.stack.push(v);
116        self.on_stack.insert(v, true);
117
118        // Visit successors
119        if let Some(edge_indices) = self.graph.forward.get(v) {
120            for &idx in edge_indices {
121                let edge = &self.graph.edges[idx];
122                if !self.graph.is_internal_edge(edge) {
123                    continue;
124                }
125                let w = edge.target.as_str();
126                if !self.index.contains_key(w) {
127                    self.strongconnect(w);
128                    let w_low = self.lowlink[w];
129                    let v_low = self.lowlink[v];
130                    if w_low < v_low {
131                        self.lowlink.insert(v, w_low);
132                    }
133                } else if self.on_stack.get(w) == Some(&true) {
134                    let w_idx = self.index[w];
135                    let v_low = self.lowlink[v];
136                    if w_idx < v_low {
137                        self.lowlink.insert(v, w_idx);
138                    }
139                }
140            }
141        }
142
143        // If v is a root node, pop the stack to form an SCC
144        if self.lowlink[v] == self.index[v] {
145            let mut component = Vec::new();
146            loop {
147                let w = self.stack.pop().unwrap();
148                self.on_stack.insert(w, false);
149                component.push(w.to_string());
150                if w == v {
151                    break;
152                }
153            }
154            component.sort();
155            self.components.push(component);
156        }
157    }
158}
159
160#[cfg(test)]
161mod tests {
162    use super::*;
163    use crate::analyses::AnalysisContext;
164    use crate::config::Config;
165    use crate::graph::Graph;
166    use crate::graph::test_helpers::{make_edge, make_node};
167    use std::path::Path;
168
169    fn make_ctx<'a>(graph: &'a Graph, config: &'a Config) -> AnalysisContext<'a> {
170        AnalysisContext {
171            graph,
172            root: Path::new("."),
173            config,
174            lockfile: None,
175        }
176    }
177
178    #[test]
179    fn dag_has_no_nontrivial_sccs() {
180        let mut graph = Graph::new();
181        graph.add_node(make_node("a.md"));
182        graph.add_node(make_node("b.md"));
183        graph.add_node(make_node("c.md"));
184        graph.add_edge(make_edge("a.md", "b.md"));
185        graph.add_edge(make_edge("b.md", "c.md"));
186
187        let config = Config::defaults();
188        let result = StronglyConnectedComponents.run(&make_ctx(&graph, &config));
189        assert_eq!(result.scc_count, 3);
190        assert_eq!(result.nontrivial_count, 0);
191        assert!(result.sccs.is_empty());
192    }
193
194    #[test]
195    fn simple_cycle() {
196        let mut graph = Graph::new();
197        graph.add_node(make_node("a.md"));
198        graph.add_node(make_node("b.md"));
199        graph.add_node(make_node("c.md"));
200        graph.add_edge(make_edge("a.md", "b.md"));
201        graph.add_edge(make_edge("b.md", "c.md"));
202        graph.add_edge(make_edge("c.md", "a.md"));
203
204        let config = Config::defaults();
205        let result = StronglyConnectedComponents.run(&make_ctx(&graph, &config));
206        assert_eq!(result.nontrivial_count, 1);
207        assert_eq!(result.sccs.len(), 1);
208        assert_eq!(result.sccs[0].members, vec!["a.md", "b.md", "c.md"]);
209    }
210
211    #[test]
212    fn two_separate_cycles() {
213        let mut graph = Graph::new();
214        graph.add_node(make_node("a.md"));
215        graph.add_node(make_node("b.md"));
216        graph.add_node(make_node("c.md"));
217        graph.add_node(make_node("d.md"));
218        graph.add_edge(make_edge("a.md", "b.md"));
219        graph.add_edge(make_edge("b.md", "a.md"));
220        graph.add_edge(make_edge("c.md", "d.md"));
221        graph.add_edge(make_edge("d.md", "c.md"));
222
223        let config = Config::defaults();
224        let result = StronglyConnectedComponents.run(&make_ctx(&graph, &config));
225        assert_eq!(result.nontrivial_count, 2);
226        assert_eq!(result.sccs.len(), 2);
227    }
228
229    #[test]
230    fn mixed_cyclic_and_acyclic() {
231        let mut graph = Graph::new();
232        graph.add_node(make_node("a.md"));
233        graph.add_node(make_node("b.md"));
234        graph.add_node(make_node("c.md"));
235        graph.add_edge(make_edge("a.md", "b.md"));
236        graph.add_edge(make_edge("b.md", "c.md"));
237        graph.add_edge(make_edge("c.md", "b.md"));
238
239        let config = Config::defaults();
240        let result = StronglyConnectedComponents.run(&make_ctx(&graph, &config));
241        assert_eq!(result.nontrivial_count, 1);
242        assert_eq!(result.sccs[0].members, vec!["b.md", "c.md"]);
243        assert!(result.node_scc.contains_key("a.md"));
244    }
245
246    #[test]
247    fn self_loop_is_nontrivial() {
248        let mut graph = Graph::new();
249        graph.add_node(make_node("a.md"));
250        graph.add_edge(make_edge("a.md", "a.md"));
251
252        let config = Config::defaults();
253        let result = StronglyConnectedComponents.run(&make_ctx(&graph, &config));
254        assert_eq!(result.nontrivial_count, 1);
255        assert_eq!(result.sccs[0].members, vec!["a.md"]);
256    }
257
258    #[test]
259    fn empty_graph() {
260        let graph = Graph::new();
261        let config = Config::defaults();
262        let result = StronglyConnectedComponents.run(&make_ctx(&graph, &config));
263        assert_eq!(result.scc_count, 0);
264        assert_eq!(result.nontrivial_count, 0);
265    }
266
267    #[test]
268    fn every_node_has_scc_id() {
269        let mut graph = Graph::new();
270        graph.add_node(make_node("a.md"));
271        graph.add_node(make_node("b.md"));
272        graph.add_node(make_node("c.md"));
273        graph.add_edge(make_edge("a.md", "b.md"));
274        graph.add_edge(make_edge("b.md", "c.md"));
275
276        let config = Config::defaults();
277        let result = StronglyConnectedComponents.run(&make_ctx(&graph, &config));
278        assert!(result.node_scc.contains_key("a.md"));
279        assert!(result.node_scc.contains_key("b.md"));
280        assert!(result.node_scc.contains_key("c.md"));
281    }
282}