Skip to main content

fallow_graph/graph/
cycles.rs

1//! Circular dependency detection via Tarjan's SCC algorithm + elementary cycle enumeration.
2
3use std::ops::Range;
4
5use fixedbitset::FixedBitSet;
6use rustc_hash::FxHashSet;
7
8use fallow_types::discover::FileId;
9
10use super::ModuleGraph;
11use super::types::ModuleNode;
12
13impl ModuleGraph {
14    /// Find all circular dependency cycles in the module graph.
15    ///
16    /// Uses an iterative implementation of Tarjan's strongly connected components
17    /// algorithm (O(V + E)) to find all SCCs with 2 or more nodes. Each such SCC
18    /// represents a set of files involved in a circular dependency.
19    ///
20    /// Returns cycles sorted by length (shortest first), with files within each
21    /// cycle sorted by path for deterministic output.
22    pub fn find_cycles(&self) -> Vec<Vec<FileId>> {
23        let n = self.modules.len();
24        if n == 0 {
25            return Vec::new();
26        }
27
28        // Tarjan's SCC state
29        let mut index_counter: u32 = 0;
30        let mut indices: Vec<u32> = vec![u32::MAX; n]; // u32::MAX = undefined
31        let mut lowlinks: Vec<u32> = vec![0; n];
32        let mut on_stack = FixedBitSet::with_capacity(n);
33        let mut stack: Vec<usize> = Vec::new();
34        let mut sccs: Vec<Vec<FileId>> = Vec::new();
35
36        // Iterative DFS stack frame
37        struct Frame {
38            node: usize,
39            succ_pos: usize,
40            succ_end: usize,
41        }
42
43        // Pre-collect all successors (deduplicated) into a flat vec for cache-friendly access.
44        let mut all_succs: Vec<usize> = Vec::with_capacity(self.edges.len());
45        let mut succ_ranges: Vec<Range<usize>> = Vec::with_capacity(n);
46        let mut seen_set = FxHashSet::default();
47        for module in &self.modules {
48            let start = all_succs.len();
49            seen_set.clear();
50            for edge in &self.edges[module.edge_range.clone()] {
51                let target = edge.target.0 as usize;
52                if target < n && seen_set.insert(target) {
53                    all_succs.push(target);
54                }
55            }
56            let end = all_succs.len();
57            succ_ranges.push(start..end);
58        }
59
60        let mut dfs_stack: Vec<Frame> = Vec::new();
61
62        for start_node in 0..n {
63            if indices[start_node] != u32::MAX {
64                continue;
65            }
66
67            // Push the starting node
68            indices[start_node] = index_counter;
69            lowlinks[start_node] = index_counter;
70            index_counter += 1;
71            on_stack.insert(start_node);
72            stack.push(start_node);
73
74            let range = &succ_ranges[start_node];
75            dfs_stack.push(Frame {
76                node: start_node,
77                succ_pos: range.start,
78                succ_end: range.end,
79            });
80
81            while let Some(frame) = dfs_stack.last_mut() {
82                if frame.succ_pos < frame.succ_end {
83                    let w = all_succs[frame.succ_pos];
84                    frame.succ_pos += 1;
85
86                    if indices[w] == u32::MAX {
87                        // Tree edge: push w onto the DFS stack
88                        indices[w] = index_counter;
89                        lowlinks[w] = index_counter;
90                        index_counter += 1;
91                        on_stack.insert(w);
92                        stack.push(w);
93
94                        let range = &succ_ranges[w];
95                        dfs_stack.push(Frame {
96                            node: w,
97                            succ_pos: range.start,
98                            succ_end: range.end,
99                        });
100                    } else if on_stack.contains(w) {
101                        // Back edge: update lowlink
102                        let v = frame.node;
103                        lowlinks[v] = lowlinks[v].min(indices[w]);
104                    }
105                } else {
106                    // All successors processed — pop this frame
107                    let v = frame.node;
108                    let v_lowlink = lowlinks[v];
109                    let v_index = indices[v];
110                    dfs_stack.pop();
111
112                    // Update parent's lowlink
113                    if let Some(parent) = dfs_stack.last_mut() {
114                        lowlinks[parent.node] = lowlinks[parent.node].min(v_lowlink);
115                    }
116
117                    // If v is a root node, pop the SCC
118                    if v_lowlink == v_index {
119                        let mut scc = Vec::new();
120                        loop {
121                            let w = stack.pop().expect("SCC stack should not be empty");
122                            on_stack.set(w, false);
123                            scc.push(FileId(w as u32));
124                            if w == v {
125                                break;
126                            }
127                        }
128                        // Only report cycles of length >= 2
129                        if scc.len() >= 2 {
130                            sccs.push(scc);
131                        }
132                    }
133                }
134            }
135        }
136
137        // Phase 2: Enumerate individual elementary cycles within each SCC.
138        // For small SCCs (len == 2), there's exactly one cycle.
139        // For larger SCCs, use bounded DFS to find up to MAX_CYCLES_PER_SCC cycles.
140        const MAX_CYCLES_PER_SCC: usize = 20;
141
142        let mut result: Vec<Vec<FileId>> = Vec::new();
143        let mut seen_cycles: FxHashSet<Vec<u32>> = FxHashSet::default();
144
145        for scc in &sccs {
146            if scc.len() == 2 {
147                let mut cycle = vec![scc[0].0 as usize, scc[1].0 as usize];
148                // Canonical: smallest path first
149                if self.modules[cycle[1]].path < self.modules[cycle[0]].path {
150                    cycle.swap(0, 1);
151                }
152                let key: Vec<u32> = cycle.iter().map(|&n| n as u32).collect();
153                if seen_cycles.insert(key) {
154                    result.push(cycle.into_iter().map(|n| FileId(n as u32)).collect());
155                }
156                continue;
157            }
158
159            let scc_nodes: Vec<usize> = scc.iter().map(|id| id.0 as usize).collect();
160            let elementary = enumerate_elementary_cycles(
161                &scc_nodes,
162                &all_succs,
163                &succ_ranges,
164                MAX_CYCLES_PER_SCC,
165                &self.modules,
166            );
167
168            for cycle in elementary {
169                let key: Vec<u32> = cycle.iter().map(|&n| n as u32).collect();
170                if seen_cycles.insert(key) {
171                    result.push(cycle.into_iter().map(|n| FileId(n as u32)).collect());
172                }
173            }
174        }
175
176        // Sort: shortest first, then by first file path
177        result.sort_by(|a, b| {
178            a.len().cmp(&b.len()).then_with(|| {
179                self.modules[a[0].0 as usize]
180                    .path
181                    .cmp(&self.modules[b[0].0 as usize].path)
182            })
183        });
184
185        result
186    }
187}
188
189/// Rotate a cycle so the node with the smallest path is first (canonical form for dedup).
190fn canonical_cycle(cycle: &[usize], modules: &[ModuleNode]) -> Vec<usize> {
191    if cycle.is_empty() {
192        return Vec::new();
193    }
194    let min_pos = cycle
195        .iter()
196        .enumerate()
197        .min_by(|(_, a), (_, b)| modules[**a].path.cmp(&modules[**b].path))
198        .map_or(0, |(i, _)| i);
199    let mut result = cycle[min_pos..].to_vec();
200    result.extend_from_slice(&cycle[..min_pos]);
201    result
202}
203
204/// Enumerate individual elementary cycles within an SCC using depth-limited DFS.
205///
206/// Uses iterative deepening: first finds all 2-node cycles, then 3-node, etc.
207/// This ensures the shortest, most actionable cycles are always found first.
208/// Stops after `max_cycles` total cycles to bound work on dense SCCs.
209fn enumerate_elementary_cycles(
210    scc_nodes: &[usize],
211    all_succs: &[usize],
212    succ_ranges: &[Range<usize>],
213    max_cycles: usize,
214    modules: &[ModuleNode],
215) -> Vec<Vec<usize>> {
216    let scc_set: FxHashSet<usize> = scc_nodes.iter().copied().collect();
217    let mut cycles: Vec<Vec<usize>> = Vec::new();
218    let mut seen: FxHashSet<Vec<u32>> = FxHashSet::default();
219
220    // Sort start nodes by path for deterministic enumeration order
221    let mut sorted_nodes: Vec<usize> = scc_nodes.to_vec();
222    sorted_nodes.sort_by(|a, b| modules[*a].path.cmp(&modules[*b].path));
223
224    // DFS frame for iterative cycle finding
225    struct CycleFrame {
226        succ_pos: usize,
227        succ_end: usize,
228    }
229
230    // Iterative deepening: increase max depth from 2 up to SCC size
231    let max_depth = scc_nodes.len().min(12); // Cap depth to avoid very long cycles
232    for depth_limit in 2..=max_depth {
233        if cycles.len() >= max_cycles {
234            break;
235        }
236
237        for &start in &sorted_nodes {
238            if cycles.len() >= max_cycles {
239                break;
240            }
241
242            let mut path: Vec<usize> = vec![start];
243            let mut path_set = FixedBitSet::with_capacity(modules.len());
244            path_set.insert(start);
245
246            let range = &succ_ranges[start];
247            let mut dfs: Vec<CycleFrame> = vec![CycleFrame {
248                succ_pos: range.start,
249                succ_end: range.end,
250            }];
251
252            while let Some(frame) = dfs.last_mut() {
253                if cycles.len() >= max_cycles {
254                    break;
255                }
256
257                if frame.succ_pos >= frame.succ_end {
258                    // Backtrack
259                    dfs.pop();
260                    if path.len() > 1 {
261                        let last = path.pop().unwrap();
262                        path_set.set(last, false);
263                    }
264                    continue;
265                }
266
267                let w = all_succs[frame.succ_pos];
268                frame.succ_pos += 1;
269
270                // Only follow edges within this SCC
271                if !scc_set.contains(&w) {
272                    continue;
273                }
274
275                if w == start && path.len() >= 2 && path.len() == depth_limit {
276                    // Found an elementary cycle at exactly this depth
277                    let canonical = canonical_cycle(&path, modules);
278                    let key: Vec<u32> = canonical.iter().map(|&n| n as u32).collect();
279                    if seen.insert(key) {
280                        cycles.push(canonical);
281                    }
282                } else if !path_set.contains(w) && path.len() < depth_limit {
283                    // Extend path (only if within depth limit)
284                    path.push(w);
285                    path_set.insert(w);
286
287                    let range = &succ_ranges[w];
288                    dfs.push(CycleFrame {
289                        succ_pos: range.start,
290                        succ_end: range.end,
291                    });
292                }
293            }
294        }
295    }
296
297    cycles
298}
299
300#[cfg(test)]
301mod tests {
302    use std::path::PathBuf;
303
304    use crate::resolve::{ResolveResult, ResolvedImport, ResolvedModule};
305    use fallow_types::discover::{DiscoveredFile, EntryPoint, EntryPointSource, FileId};
306    use fallow_types::extract::{ExportName, ImportInfo, ImportedName};
307
308    use super::ModuleGraph;
309
310    /// Helper: build a graph from files+edges, no entry points needed for cycle detection.
311    fn build_cycle_graph(file_count: usize, edges_spec: &[(u32, u32)]) -> ModuleGraph {
312        let files: Vec<DiscoveredFile> = (0..file_count)
313            .map(|i| DiscoveredFile {
314                id: FileId(i as u32),
315                path: PathBuf::from(format!("/project/file{i}.ts")),
316                size_bytes: 100,
317            })
318            .collect();
319
320        let resolved_modules: Vec<ResolvedModule> = (0..file_count)
321            .map(|i| {
322                let imports: Vec<ResolvedImport> = edges_spec
323                    .iter()
324                    .filter(|(src, _)| *src == i as u32)
325                    .map(|(_, tgt)| ResolvedImport {
326                        info: ImportInfo {
327                            source: format!("./file{tgt}"),
328                            imported_name: ImportedName::Named("x".to_string()),
329                            local_name: "x".to_string(),
330                            is_type_only: false,
331                            span: oxc_span::Span::new(0, 10),
332                        },
333                        target: ResolveResult::InternalModule(FileId(*tgt)),
334                    })
335                    .collect();
336
337                ResolvedModule {
338                    file_id: FileId(i as u32),
339                    path: PathBuf::from(format!("/project/file{i}.ts")),
340                    exports: vec![fallow_types::extract::ExportInfo {
341                        name: ExportName::Named("x".to_string()),
342                        local_name: Some("x".to_string()),
343                        is_type_only: false,
344                        span: oxc_span::Span::new(0, 20),
345                        members: vec![],
346                    }],
347                    re_exports: vec![],
348                    resolved_imports: imports,
349                    resolved_dynamic_imports: vec![],
350                    resolved_dynamic_patterns: vec![],
351                    member_accesses: vec![],
352                    whole_object_uses: vec![],
353                    has_cjs_exports: false,
354                }
355            })
356            .collect();
357
358        let entry_points = vec![EntryPoint {
359            path: PathBuf::from("/project/file0.ts"),
360            source: EntryPointSource::PackageJsonMain,
361        }];
362
363        ModuleGraph::build(&resolved_modules, &entry_points, &files)
364    }
365
366    #[test]
367    fn find_cycles_empty_graph() {
368        let graph = ModuleGraph::build(&[], &[], &[]);
369        assert!(graph.find_cycles().is_empty());
370    }
371
372    #[test]
373    fn find_cycles_no_cycles() {
374        // A -> B -> C (no back edges)
375        let graph = build_cycle_graph(3, &[(0, 1), (1, 2)]);
376        assert!(graph.find_cycles().is_empty());
377    }
378
379    #[test]
380    fn find_cycles_simple_two_node_cycle() {
381        // A -> B -> A
382        let graph = build_cycle_graph(2, &[(0, 1), (1, 0)]);
383        let cycles = graph.find_cycles();
384        assert_eq!(cycles.len(), 1);
385        assert_eq!(cycles[0].len(), 2);
386    }
387
388    #[test]
389    fn find_cycles_three_node_cycle() {
390        // A -> B -> C -> A
391        let graph = build_cycle_graph(3, &[(0, 1), (1, 2), (2, 0)]);
392        let cycles = graph.find_cycles();
393        assert_eq!(cycles.len(), 1);
394        assert_eq!(cycles[0].len(), 3);
395    }
396
397    #[test]
398    fn find_cycles_self_import_ignored() {
399        // A -> A (self-import, should NOT be reported)
400        let graph = build_cycle_graph(1, &[(0, 0)]);
401        let cycles = graph.find_cycles();
402        assert!(
403            cycles.is_empty(),
404            "self-imports should not be reported as cycles"
405        );
406    }
407
408    #[test]
409    fn find_cycles_multiple_independent_cycles() {
410        // Cycle 1: A -> B -> A
411        // Cycle 2: C -> D -> C
412        // No connection between cycles
413        let graph = build_cycle_graph(4, &[(0, 1), (1, 0), (2, 3), (3, 2)]);
414        let cycles = graph.find_cycles();
415        assert_eq!(cycles.len(), 2);
416        // Both cycles should have length 2
417        assert!(cycles.iter().all(|c| c.len() == 2));
418    }
419
420    #[test]
421    fn find_cycles_linear_chain_with_back_edge() {
422        // A -> B -> C -> D -> B (cycle is B-C-D)
423        let graph = build_cycle_graph(4, &[(0, 1), (1, 2), (2, 3), (3, 1)]);
424        let cycles = graph.find_cycles();
425        assert_eq!(cycles.len(), 1);
426        assert_eq!(cycles[0].len(), 3);
427        // The cycle should contain files 1, 2, 3
428        let ids: Vec<u32> = cycles[0].iter().map(|f| f.0).collect();
429        assert!(ids.contains(&1));
430        assert!(ids.contains(&2));
431        assert!(ids.contains(&3));
432        assert!(!ids.contains(&0));
433    }
434
435    #[test]
436    fn find_cycles_overlapping_cycles_enumerated() {
437        // A -> B -> A, B -> C -> B => SCC is {A, B, C} but should report 2 elementary cycles
438        let graph = build_cycle_graph(3, &[(0, 1), (1, 0), (1, 2), (2, 1)]);
439        let cycles = graph.find_cycles();
440        assert_eq!(
441            cycles.len(),
442            2,
443            "should find 2 elementary cycles, not 1 SCC"
444        );
445        assert!(
446            cycles.iter().all(|c| c.len() == 2),
447            "both cycles should have length 2"
448        );
449    }
450
451    #[test]
452    fn find_cycles_deterministic_ordering() {
453        // Run twice with the same graph — results should be identical
454        let graph1 = build_cycle_graph(3, &[(0, 1), (1, 2), (2, 0)]);
455        let graph2 = build_cycle_graph(3, &[(0, 1), (1, 2), (2, 0)]);
456        let cycles1 = graph1.find_cycles();
457        let cycles2 = graph2.find_cycles();
458        assert_eq!(cycles1.len(), cycles2.len());
459        for (c1, c2) in cycles1.iter().zip(cycles2.iter()) {
460            let paths1: Vec<&PathBuf> = c1
461                .iter()
462                .map(|f| &graph1.modules[f.0 as usize].path)
463                .collect();
464            let paths2: Vec<&PathBuf> = c2
465                .iter()
466                .map(|f| &graph2.modules[f.0 as usize].path)
467                .collect();
468            assert_eq!(paths1, paths2);
469        }
470    }
471
472    #[test]
473    fn find_cycles_sorted_by_length() {
474        // Two cycles: A-B (len 2) and C-D-E (len 3)
475        let graph = build_cycle_graph(5, &[(0, 1), (1, 0), (2, 3), (3, 4), (4, 2)]);
476        let cycles = graph.find_cycles();
477        assert_eq!(cycles.len(), 2);
478        assert!(
479            cycles[0].len() <= cycles[1].len(),
480            "cycles should be sorted by length"
481        );
482    }
483
484    #[test]
485    fn find_cycles_large_cycle() {
486        // Chain of 10 nodes forming a single cycle: 0->1->2->...->9->0
487        let edges: Vec<(u32, u32)> = (0..10).map(|i| (i, (i + 1) % 10)).collect();
488        let graph = build_cycle_graph(10, &edges);
489        let cycles = graph.find_cycles();
490        assert_eq!(cycles.len(), 1);
491        assert_eq!(cycles[0].len(), 10);
492    }
493
494    #[test]
495    fn find_cycles_complex_scc_multiple_elementary() {
496        // A square: A->B, B->C, C->D, D->A, plus diagonal A->C
497        // Elementary cycles: A->B->C->D->A, A->C->D->A, and A->B->C->...
498        let graph = build_cycle_graph(4, &[(0, 1), (1, 2), (2, 3), (3, 0), (0, 2)]);
499        let cycles = graph.find_cycles();
500        // Should find multiple elementary cycles, not just one SCC of 4
501        assert!(
502            cycles.len() >= 2,
503            "should find at least 2 elementary cycles, got {}",
504            cycles.len()
505        );
506        // All cycles should be shorter than the full SCC
507        assert!(cycles.iter().all(|c| c.len() <= 4));
508    }
509
510    #[test]
511    fn find_cycles_no_duplicate_cycles() {
512        // Triangle: A->B->C->A — should find exactly 1 cycle, not duplicates
513        // from different DFS start points
514        let graph = build_cycle_graph(3, &[(0, 1), (1, 2), (2, 0)]);
515        let cycles = graph.find_cycles();
516        assert_eq!(cycles.len(), 1, "triangle should produce exactly 1 cycle");
517        assert_eq!(cycles[0].len(), 3);
518    }
519}