Skip to main content

fallow_graph/graph/
cycles.rs

1//! Circular dependency detection via Tarjan's SCC algorithm + elementary cycle enumeration.
2#![expect(clippy::excessive_nesting)]
3
4use std::ops::Range;
5
6use fixedbitset::FixedBitSet;
7use rustc_hash::FxHashSet;
8
9use fallow_types::discover::FileId;
10
11use super::ModuleGraph;
12use super::types::ModuleNode;
13
14impl ModuleGraph {
15    /// Find all circular dependency cycles in the module graph.
16    ///
17    /// Uses an iterative implementation of Tarjan's strongly connected components
18    /// algorithm (O(V + E)) to find all SCCs with 2 or more nodes. Each such SCC
19    /// represents a set of files involved in a circular dependency.
20    ///
21    /// Returns cycles sorted by length (shortest first), with files within each
22    /// cycle sorted by path for deterministic output.
23    pub fn find_cycles(&self) -> Vec<Vec<FileId>> {
24        let n = self.modules.len();
25        if n == 0 {
26            return Vec::new();
27        }
28
29        // Tarjan's SCC state
30        let mut index_counter: u32 = 0;
31        let mut indices: Vec<u32> = vec![u32::MAX; n]; // u32::MAX = undefined
32        let mut lowlinks: Vec<u32> = vec![0; n];
33        let mut on_stack = FixedBitSet::with_capacity(n);
34        let mut stack: Vec<usize> = Vec::new();
35        let mut sccs: Vec<Vec<FileId>> = Vec::new();
36
37        // Iterative DFS stack frame
38        struct Frame {
39            node: usize,
40            succ_pos: usize,
41            succ_end: usize,
42        }
43
44        // Pre-collect all successors (deduplicated) into a flat vec for cache-friendly access.
45        let mut all_succs: Vec<usize> = Vec::with_capacity(self.edges.len());
46        let mut succ_ranges: Vec<Range<usize>> = Vec::with_capacity(n);
47        let mut seen_set = FxHashSet::default();
48        for module in &self.modules {
49            let start = all_succs.len();
50            seen_set.clear();
51            for edge in &self.edges[module.edge_range.clone()] {
52                let target = edge.target.0 as usize;
53                if target < n && seen_set.insert(target) {
54                    all_succs.push(target);
55                }
56            }
57            let end = all_succs.len();
58            succ_ranges.push(start..end);
59        }
60
61        let mut dfs_stack: Vec<Frame> = Vec::new();
62
63        for start_node in 0..n {
64            if indices[start_node] != u32::MAX {
65                continue;
66            }
67
68            // Push the starting node
69            indices[start_node] = index_counter;
70            lowlinks[start_node] = index_counter;
71            index_counter += 1;
72            on_stack.insert(start_node);
73            stack.push(start_node);
74
75            let range = &succ_ranges[start_node];
76            dfs_stack.push(Frame {
77                node: start_node,
78                succ_pos: range.start,
79                succ_end: range.end,
80            });
81
82            while let Some(frame) = dfs_stack.last_mut() {
83                if frame.succ_pos < frame.succ_end {
84                    let w = all_succs[frame.succ_pos];
85                    frame.succ_pos += 1;
86
87                    if indices[w] == u32::MAX {
88                        // Tree edge: push w onto the DFS stack
89                        indices[w] = index_counter;
90                        lowlinks[w] = index_counter;
91                        index_counter += 1;
92                        on_stack.insert(w);
93                        stack.push(w);
94
95                        let range = &succ_ranges[w];
96                        dfs_stack.push(Frame {
97                            node: w,
98                            succ_pos: range.start,
99                            succ_end: range.end,
100                        });
101                    } else if on_stack.contains(w) {
102                        // Back edge: update lowlink
103                        let v = frame.node;
104                        lowlinks[v] = lowlinks[v].min(indices[w]);
105                    }
106                } else {
107                    // All successors processed — pop this frame
108                    let v = frame.node;
109                    let v_lowlink = lowlinks[v];
110                    let v_index = indices[v];
111                    dfs_stack.pop();
112
113                    // Update parent's lowlink
114                    if let Some(parent) = dfs_stack.last_mut() {
115                        lowlinks[parent.node] = lowlinks[parent.node].min(v_lowlink);
116                    }
117
118                    // If v is a root node, pop the SCC
119                    if v_lowlink == v_index {
120                        let mut scc = Vec::new();
121                        loop {
122                            let w = stack.pop().expect("SCC stack should not be empty");
123                            on_stack.set(w, false);
124                            scc.push(FileId(w as u32));
125                            if w == v {
126                                break;
127                            }
128                        }
129                        // Only report cycles of length >= 2
130                        if scc.len() >= 2 {
131                            sccs.push(scc);
132                        }
133                    }
134                }
135            }
136        }
137
138        // Phase 2: Enumerate individual elementary cycles within each SCC.
139        // For small SCCs (len == 2), there's exactly one cycle.
140        // For larger SCCs, use bounded DFS to find up to MAX_CYCLES_PER_SCC cycles.
141        const MAX_CYCLES_PER_SCC: usize = 20;
142
143        let mut result: Vec<Vec<FileId>> = Vec::new();
144        let mut seen_cycles: FxHashSet<Vec<u32>> = FxHashSet::default();
145
146        for scc in &sccs {
147            if scc.len() == 2 {
148                let mut cycle = vec![scc[0].0 as usize, scc[1].0 as usize];
149                // Canonical: smallest path first
150                if self.modules[cycle[1]].path < self.modules[cycle[0]].path {
151                    cycle.swap(0, 1);
152                }
153                let key: Vec<u32> = cycle.iter().map(|&n| n as u32).collect();
154                if seen_cycles.insert(key) {
155                    result.push(cycle.into_iter().map(|n| FileId(n as u32)).collect());
156                }
157                continue;
158            }
159
160            let scc_nodes: Vec<usize> = scc.iter().map(|id| id.0 as usize).collect();
161            let elementary = enumerate_elementary_cycles(
162                &scc_nodes,
163                &all_succs,
164                &succ_ranges,
165                MAX_CYCLES_PER_SCC,
166                &self.modules,
167            );
168
169            for cycle in elementary {
170                let key: Vec<u32> = cycle.iter().map(|&n| n as u32).collect();
171                if seen_cycles.insert(key) {
172                    result.push(cycle.into_iter().map(|n| FileId(n as u32)).collect());
173                }
174            }
175        }
176
177        // Sort: shortest first, then by first file path
178        result.sort_by(|a, b| {
179            a.len().cmp(&b.len()).then_with(|| {
180                self.modules[a[0].0 as usize]
181                    .path
182                    .cmp(&self.modules[b[0].0 as usize].path)
183            })
184        });
185
186        result
187    }
188}
189
190/// Rotate a cycle so the node with the smallest path is first (canonical form for dedup).
191fn canonical_cycle(cycle: &[usize], modules: &[ModuleNode]) -> Vec<usize> {
192    if cycle.is_empty() {
193        return Vec::new();
194    }
195    let min_pos = cycle
196        .iter()
197        .enumerate()
198        .min_by(|(_, a), (_, b)| modules[**a].path.cmp(&modules[**b].path))
199        .map_or(0, |(i, _)| i);
200    let mut result = cycle[min_pos..].to_vec();
201    result.extend_from_slice(&cycle[..min_pos]);
202    result
203}
204
205/// Enumerate individual elementary cycles within an SCC using depth-limited DFS.
206///
207/// Uses iterative deepening: first finds all 2-node cycles, then 3-node, etc.
208/// This ensures the shortest, most actionable cycles are always found first.
209/// Stops after `max_cycles` total cycles to bound work on dense SCCs.
210fn enumerate_elementary_cycles(
211    scc_nodes: &[usize],
212    all_succs: &[usize],
213    succ_ranges: &[Range<usize>],
214    max_cycles: usize,
215    modules: &[ModuleNode],
216) -> Vec<Vec<usize>> {
217    let scc_set: FxHashSet<usize> = scc_nodes.iter().copied().collect();
218    let mut cycles: Vec<Vec<usize>> = Vec::new();
219    let mut seen: FxHashSet<Vec<u32>> = FxHashSet::default();
220
221    // Sort start nodes by path for deterministic enumeration order
222    let mut sorted_nodes: Vec<usize> = scc_nodes.to_vec();
223    sorted_nodes.sort_by(|a, b| modules[*a].path.cmp(&modules[*b].path));
224
225    // DFS frame for iterative cycle finding
226    struct CycleFrame {
227        succ_pos: usize,
228        succ_end: usize,
229    }
230
231    // Iterative deepening: increase max depth from 2 up to SCC size
232    let max_depth = scc_nodes.len().min(12); // Cap depth to avoid very long cycles
233    for depth_limit in 2..=max_depth {
234        if cycles.len() >= max_cycles {
235            break;
236        }
237
238        for &start in &sorted_nodes {
239            if cycles.len() >= max_cycles {
240                break;
241            }
242
243            let mut path: Vec<usize> = vec![start];
244            let mut path_set = FixedBitSet::with_capacity(modules.len());
245            path_set.insert(start);
246
247            let range = &succ_ranges[start];
248            let mut dfs: Vec<CycleFrame> = vec![CycleFrame {
249                succ_pos: range.start,
250                succ_end: range.end,
251            }];
252
253            while let Some(frame) = dfs.last_mut() {
254                if cycles.len() >= max_cycles {
255                    break;
256                }
257
258                if frame.succ_pos >= frame.succ_end {
259                    // Backtrack
260                    dfs.pop();
261                    if path.len() > 1 {
262                        let last = path.pop().unwrap();
263                        path_set.set(last, false);
264                    }
265                    continue;
266                }
267
268                let w = all_succs[frame.succ_pos];
269                frame.succ_pos += 1;
270
271                // Only follow edges within this SCC
272                if !scc_set.contains(&w) {
273                    continue;
274                }
275
276                if w == start && path.len() >= 2 && path.len() == depth_limit {
277                    // Found an elementary cycle at exactly this depth
278                    let canonical = canonical_cycle(&path, modules);
279                    let key: Vec<u32> = canonical.iter().map(|&n| n as u32).collect();
280                    if seen.insert(key) {
281                        cycles.push(canonical);
282                    }
283                } else if !path_set.contains(w) && path.len() < depth_limit {
284                    // Extend path (only if within depth limit)
285                    path.push(w);
286                    path_set.insert(w);
287
288                    let range = &succ_ranges[w];
289                    dfs.push(CycleFrame {
290                        succ_pos: range.start,
291                        succ_end: range.end,
292                    });
293                }
294            }
295        }
296    }
297
298    cycles
299}
300
301#[cfg(test)]
302mod tests {
303    use std::path::PathBuf;
304
305    use crate::resolve::{ResolveResult, ResolvedImport, ResolvedModule};
306    use fallow_types::discover::{DiscoveredFile, EntryPoint, EntryPointSource, FileId};
307    use fallow_types::extract::{ExportName, ImportInfo, ImportedName};
308
309    use super::ModuleGraph;
310
311    /// Helper: build a graph from files+edges, no entry points needed for cycle detection.
312    fn build_cycle_graph(file_count: usize, edges_spec: &[(u32, u32)]) -> ModuleGraph {
313        let files: Vec<DiscoveredFile> = (0..file_count)
314            .map(|i| DiscoveredFile {
315                id: FileId(i as u32),
316                path: PathBuf::from(format!("/project/file{i}.ts")),
317                size_bytes: 100,
318            })
319            .collect();
320
321        let resolved_modules: Vec<ResolvedModule> = (0..file_count)
322            .map(|i| {
323                let imports: Vec<ResolvedImport> = edges_spec
324                    .iter()
325                    .filter(|(src, _)| *src == i as u32)
326                    .map(|(_, tgt)| ResolvedImport {
327                        info: ImportInfo {
328                            source: format!("./file{tgt}"),
329                            imported_name: ImportedName::Named("x".to_string()),
330                            local_name: "x".to_string(),
331                            is_type_only: false,
332                            span: oxc_span::Span::new(0, 10),
333                        },
334                        target: ResolveResult::InternalModule(FileId(*tgt)),
335                    })
336                    .collect();
337
338                ResolvedModule {
339                    file_id: FileId(i as u32),
340                    path: PathBuf::from(format!("/project/file{i}.ts")),
341                    exports: vec![fallow_types::extract::ExportInfo {
342                        name: ExportName::Named("x".to_string()),
343                        local_name: Some("x".to_string()),
344                        is_type_only: false,
345                        is_public: false,
346                        span: oxc_span::Span::new(0, 20),
347                        members: vec![],
348                    }],
349                    re_exports: vec![],
350                    resolved_imports: imports,
351                    resolved_dynamic_imports: vec![],
352                    resolved_dynamic_patterns: vec![],
353                    member_accesses: vec![],
354                    whole_object_uses: vec![],
355                    has_cjs_exports: false,
356                    unused_import_bindings: vec![],
357                }
358            })
359            .collect();
360
361        let entry_points = vec![EntryPoint {
362            path: PathBuf::from("/project/file0.ts"),
363            source: EntryPointSource::PackageJsonMain,
364        }];
365
366        ModuleGraph::build(&resolved_modules, &entry_points, &files)
367    }
368
369    #[test]
370    fn find_cycles_empty_graph() {
371        let graph = ModuleGraph::build(&[], &[], &[]);
372        assert!(graph.find_cycles().is_empty());
373    }
374
375    #[test]
376    fn find_cycles_no_cycles() {
377        // A -> B -> C (no back edges)
378        let graph = build_cycle_graph(3, &[(0, 1), (1, 2)]);
379        assert!(graph.find_cycles().is_empty());
380    }
381
382    #[test]
383    fn find_cycles_simple_two_node_cycle() {
384        // A -> B -> A
385        let graph = build_cycle_graph(2, &[(0, 1), (1, 0)]);
386        let cycles = graph.find_cycles();
387        assert_eq!(cycles.len(), 1);
388        assert_eq!(cycles[0].len(), 2);
389    }
390
391    #[test]
392    fn find_cycles_three_node_cycle() {
393        // A -> B -> C -> A
394        let graph = build_cycle_graph(3, &[(0, 1), (1, 2), (2, 0)]);
395        let cycles = graph.find_cycles();
396        assert_eq!(cycles.len(), 1);
397        assert_eq!(cycles[0].len(), 3);
398    }
399
400    #[test]
401    fn find_cycles_self_import_ignored() {
402        // A -> A (self-import, should NOT be reported)
403        let graph = build_cycle_graph(1, &[(0, 0)]);
404        let cycles = graph.find_cycles();
405        assert!(
406            cycles.is_empty(),
407            "self-imports should not be reported as cycles"
408        );
409    }
410
411    #[test]
412    fn find_cycles_multiple_independent_cycles() {
413        // Cycle 1: A -> B -> A
414        // Cycle 2: C -> D -> C
415        // No connection between cycles
416        let graph = build_cycle_graph(4, &[(0, 1), (1, 0), (2, 3), (3, 2)]);
417        let cycles = graph.find_cycles();
418        assert_eq!(cycles.len(), 2);
419        // Both cycles should have length 2
420        assert!(cycles.iter().all(|c| c.len() == 2));
421    }
422
423    #[test]
424    fn find_cycles_linear_chain_with_back_edge() {
425        // A -> B -> C -> D -> B (cycle is B-C-D)
426        let graph = build_cycle_graph(4, &[(0, 1), (1, 2), (2, 3), (3, 1)]);
427        let cycles = graph.find_cycles();
428        assert_eq!(cycles.len(), 1);
429        assert_eq!(cycles[0].len(), 3);
430        // The cycle should contain files 1, 2, 3
431        let ids: Vec<u32> = cycles[0].iter().map(|f| f.0).collect();
432        assert!(ids.contains(&1));
433        assert!(ids.contains(&2));
434        assert!(ids.contains(&3));
435        assert!(!ids.contains(&0));
436    }
437
438    #[test]
439    fn find_cycles_overlapping_cycles_enumerated() {
440        // A -> B -> A, B -> C -> B => SCC is {A, B, C} but should report 2 elementary cycles
441        let graph = build_cycle_graph(3, &[(0, 1), (1, 0), (1, 2), (2, 1)]);
442        let cycles = graph.find_cycles();
443        assert_eq!(
444            cycles.len(),
445            2,
446            "should find 2 elementary cycles, not 1 SCC"
447        );
448        assert!(
449            cycles.iter().all(|c| c.len() == 2),
450            "both cycles should have length 2"
451        );
452    }
453
454    #[test]
455    fn find_cycles_deterministic_ordering() {
456        // Run twice with the same graph — results should be identical
457        let graph1 = build_cycle_graph(3, &[(0, 1), (1, 2), (2, 0)]);
458        let graph2 = build_cycle_graph(3, &[(0, 1), (1, 2), (2, 0)]);
459        let cycles1 = graph1.find_cycles();
460        let cycles2 = graph2.find_cycles();
461        assert_eq!(cycles1.len(), cycles2.len());
462        for (c1, c2) in cycles1.iter().zip(cycles2.iter()) {
463            let paths1: Vec<&PathBuf> = c1
464                .iter()
465                .map(|f| &graph1.modules[f.0 as usize].path)
466                .collect();
467            let paths2: Vec<&PathBuf> = c2
468                .iter()
469                .map(|f| &graph2.modules[f.0 as usize].path)
470                .collect();
471            assert_eq!(paths1, paths2);
472        }
473    }
474
475    #[test]
476    fn find_cycles_sorted_by_length() {
477        // Two cycles: A-B (len 2) and C-D-E (len 3)
478        let graph = build_cycle_graph(5, &[(0, 1), (1, 0), (2, 3), (3, 4), (4, 2)]);
479        let cycles = graph.find_cycles();
480        assert_eq!(cycles.len(), 2);
481        assert!(
482            cycles[0].len() <= cycles[1].len(),
483            "cycles should be sorted by length"
484        );
485    }
486
487    #[test]
488    fn find_cycles_large_cycle() {
489        // Chain of 10 nodes forming a single cycle: 0->1->2->...->9->0
490        let edges: Vec<(u32, u32)> = (0..10).map(|i| (i, (i + 1) % 10)).collect();
491        let graph = build_cycle_graph(10, &edges);
492        let cycles = graph.find_cycles();
493        assert_eq!(cycles.len(), 1);
494        assert_eq!(cycles[0].len(), 10);
495    }
496
497    #[test]
498    fn find_cycles_complex_scc_multiple_elementary() {
499        // A square: A->B, B->C, C->D, D->A, plus diagonal A->C
500        // Elementary cycles: A->B->C->D->A, A->C->D->A, and A->B->C->...
501        let graph = build_cycle_graph(4, &[(0, 1), (1, 2), (2, 3), (3, 0), (0, 2)]);
502        let cycles = graph.find_cycles();
503        // Should find multiple elementary cycles, not just one SCC of 4
504        assert!(
505            cycles.len() >= 2,
506            "should find at least 2 elementary cycles, got {}",
507            cycles.len()
508        );
509        // All cycles should be shorter than the full SCC
510        assert!(cycles.iter().all(|c| c.len() <= 4));
511    }
512
513    #[test]
514    fn find_cycles_no_duplicate_cycles() {
515        // Triangle: A->B->C->A — should find exactly 1 cycle, not duplicates
516        // from different DFS start points
517        let graph = build_cycle_graph(3, &[(0, 1), (1, 2), (2, 0)]);
518        let cycles = graph.find_cycles();
519        assert_eq!(cycles.len(), 1, "triangle should produce exactly 1 cycle");
520        assert_eq!(cycles[0].len(), 3);
521    }
522}