Skip to main content

synta_codegen/
import_graph.rs

1//! Import dependency graph and circular-import detection.
2//!
3//! Given a set of parsed [`Module`]s, this module builds a directed dependency
4//! graph where each edge `A → B` means "module A imports symbols from module B".
5//! It then runs a depth-first search to detect back-edges (cycles) and returns
6//! each cycle as an [`ImportCycle`].
7//!
8//! Only edges between modules that are present in the provided slice are
9//! considered; imports from modules that were not provided are ignored.
10
11use crate::ast::Module;
12use std::collections::{HashMap, HashSet};
13
14/// A circular import chain detected among a set of modules.
15///
16/// [`modules`][ImportCycle::modules] contains the module names involved in the
17/// cycle.  The first and last elements are the same module to make the cycle
18/// easy to read, e.g. `["A", "B", "C", "A"]` means A imports from B, B from
19/// C, and C back from A.
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct ImportCycle {
22    /// Module names forming the cycle (first == last to show closure).
23    pub modules: Vec<String>,
24}
25
26impl ImportCycle {
27    /// Return a human-readable representation of the cycle, e.g.
28    /// `"A → B → C → A"`.
29    pub fn display(&self) -> String {
30        self.modules.join(" → ")
31    }
32}
33
34/// Detect circular imports among a slice of parsed modules.
35///
36/// Returns one [`ImportCycle`] per distinct cycle found.  The same cycle is
37/// never reported more than once regardless of which node the DFS enters it
38/// from.  An empty `Vec` means no cycles were detected.
39///
40/// # Example
41///
42/// ```
43/// use synta_codegen::{parse, import_graph::detect_cycles};
44///
45/// let a = parse("ModA DEFINITIONS ::= BEGIN IMPORTS Foo FROM ModB; END").unwrap();
46/// let b = parse("ModB DEFINITIONS ::= BEGIN IMPORTS Bar FROM ModA; END").unwrap();
47///
48/// let cycles = detect_cycles(&[a, b]);
49/// assert_eq!(cycles.len(), 1);
50/// ```
51pub fn detect_cycles(modules: &[Module]) -> Vec<ImportCycle> {
52    // Set of module names present in the input slice.
53    let known: HashSet<&str> = modules.iter().map(|m| m.name.as_str()).collect();
54
55    // Adjacency list: module name → list of module names it imports from
56    // (restricted to modules present in the input slice).
57    let mut adj: HashMap<&str, Vec<&str>> = HashMap::new();
58    for m in modules {
59        let deps: Vec<&str> = m
60            .imports
61            .iter()
62            .filter_map(|imp| {
63                let n = imp.module_name.as_str();
64                if known.contains(n) {
65                    Some(n)
66                } else {
67                    None
68                }
69            })
70            .collect();
71        adj.insert(m.name.as_str(), deps);
72    }
73
74    let mut globally_visited: HashSet<&str> = HashSet::new();
75    let mut path: Vec<&str> = Vec::new();
76    let mut path_set: HashSet<&str> = HashSet::new();
77    let mut cycles: Vec<ImportCycle> = Vec::new();
78    // Canonical forms of already-reported cycles to avoid duplicates.
79    let mut seen: HashSet<Vec<String>> = HashSet::new();
80
81    for m in modules {
82        let name = m.name.as_str();
83        if !globally_visited.contains(name) {
84            dfs(
85                name,
86                &adj,
87                &mut globally_visited,
88                &mut path,
89                &mut path_set,
90                &mut cycles,
91                &mut seen,
92            );
93        }
94    }
95
96    cycles
97}
98
99/// Compute a topological generation order for a set of modules.
100///
101/// Returns the indices into `modules` in the order they should be generated:
102/// a module that is imported by another always appears **before** the module
103/// that imports it (leaf/dependency modules first).
104///
105/// If there are no inter-module edges among the provided modules the original
106/// slice order is preserved.  Callers should run [`detect_cycles`] first;
107/// any cycle causes the involved modules to be appended in input order after
108/// all acyclic modules.
109///
110/// # Example
111///
112/// ```
113/// use synta_codegen::{parse, import_graph::topological_order};
114///
115/// // B is a dependency of A.
116/// let a = parse("ModA DEFINITIONS ::= BEGIN IMPORTS X FROM ModB; END").unwrap();
117/// let b = parse("ModB DEFINITIONS ::= BEGIN END").unwrap();
118///
119/// let order = topological_order(&[a, b]);
120/// // ModB (index 1) must be generated before ModA (index 0).
121/// assert_eq!(order, vec![1, 0]);
122/// ```
123pub fn topological_order(modules: &[Module]) -> Vec<usize> {
124    // Map module name → index in the slice.
125    let name_to_idx: HashMap<&str, usize> = modules
126        .iter()
127        .enumerate()
128        .map(|(i, m)| (m.name.as_str(), i))
129        .collect();
130
131    // in_degree[i] = number of known modules that module i depends on.
132    let mut in_degree = vec![0usize; modules.len()];
133    // rev_adj[j] = modules that depend on j (they will be unblocked when j is done).
134    let mut rev_adj: Vec<Vec<usize>> = vec![Vec::new(); modules.len()];
135
136    for (i, m) in modules.iter().enumerate() {
137        for imp in &m.imports {
138            if let Some(&j) = name_to_idx.get(imp.module_name.as_str()) {
139                // Module i depends on module j.
140                in_degree[i] += 1;
141                rev_adj[j].push(i);
142            }
143        }
144    }
145
146    // Kahn's BFS: start with all nodes that have no unresolved dependencies.
147    let mut queue: std::collections::VecDeque<usize> = in_degree
148        .iter()
149        .enumerate()
150        .filter(|(_, &d)| d == 0)
151        .map(|(i, _)| i)
152        .collect();
153
154    let mut order = Vec::with_capacity(modules.len());
155    while let Some(idx) = queue.pop_front() {
156        order.push(idx);
157        for &dependent in &rev_adj[idx] {
158            in_degree[dependent] -= 1;
159            if in_degree[dependent] == 0 {
160                queue.push_back(dependent);
161            }
162        }
163    }
164
165    // Append modules involved in cycles (only possible if detect_cycles was not called
166    // before this function; callers that do call detect_cycles first will never reach
167    // this branch because cyclic modules never reach in_degree == 0 in Kahn's BFS).
168    if order.len() < modules.len() {
169        let emitted: HashSet<usize> = order.iter().copied().collect();
170        for i in 0..modules.len() {
171            if !emitted.contains(&i) {
172                order.push(i);
173            }
174        }
175    }
176
177    order
178}
179
180/// Recursive DFS helper.
181///
182/// * `globally_visited` — nodes whose entire subtree has been explored (BLACK).
183/// * `path` / `path_set` — current DFS path from the root (GRAY).
184fn dfs<'a>(
185    node: &'a str,
186    adj: &HashMap<&'a str, Vec<&'a str>>,
187    globally_visited: &mut HashSet<&'a str>,
188    path: &mut Vec<&'a str>,
189    path_set: &mut HashSet<&'a str>,
190    cycles: &mut Vec<ImportCycle>,
191    seen: &mut HashSet<Vec<String>>,
192) {
193    if path_set.contains(node) {
194        // Back-edge: node is already on the current path → cycle found.
195        let start = path.iter().position(|&n| n == node).unwrap();
196        let raw: Vec<&str> = path[start..].to_vec();
197
198        // Normalise: rotate so the lexicographically smallest name is first,
199        // then append that name again to close the cycle.
200        let min_pos = raw
201            .iter()
202            .enumerate()
203            .min_by_key(|(_, &s)| s)
204            .map(|(i, _)| i)
205            .unwrap_or(0);
206        let mut canonical: Vec<String> = raw[min_pos..]
207            .iter()
208            .chain(raw[..min_pos].iter())
209            .map(|&s| s.to_string())
210            .collect();
211        canonical.push(canonical[0].clone()); // close the cycle
212
213        if seen.insert(canonical.clone()) {
214            cycles.push(ImportCycle { modules: canonical });
215        }
216        return;
217    }
218
219    if globally_visited.contains(node) {
220        // Already fully explored — no new cycles reachable from here.
221        return;
222    }
223
224    // Mark node as currently being explored (GRAY).
225    path.push(node);
226    path_set.insert(node);
227
228    if let Some(neighbors) = adj.get(node) {
229        for &neighbor in neighbors {
230            dfs(
231                neighbor,
232                adj,
233                globally_visited,
234                path,
235                path_set,
236                cycles,
237                seen,
238            );
239        }
240    }
241
242    // Mark node as fully explored (BLACK).
243    path.pop();
244    path_set.remove(node);
245    globally_visited.insert(node);
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251    use crate::parse;
252
253    fn make_module(name: &str, imports_from: &[&str]) -> Module {
254        let import_block = if imports_from.is_empty() {
255            String::new()
256        } else {
257            let clauses: Vec<String> = imports_from
258                .iter()
259                .map(|m| format!("Dummy FROM {}", m))
260                .collect();
261            format!("IMPORTS {};", clauses.join(", "))
262        };
263        let src = format!("{} DEFINITIONS ::= BEGIN {} END", name, import_block);
264        parse(&src).unwrap()
265    }
266
267    #[test]
268    fn test_no_cycles_single_module() {
269        let m = make_module("Alpha", &[]);
270        assert!(detect_cycles(&[m]).is_empty());
271    }
272
273    #[test]
274    fn test_no_cycles_linear_chain() {
275        // A → B → C (no cycle)
276        let a = make_module("Alpha", &["Beta"]);
277        let b = make_module("Beta", &["Gamma"]);
278        let c = make_module("Gamma", &[]);
279        assert!(detect_cycles(&[a, b, c]).is_empty());
280    }
281
282    #[test]
283    fn test_direct_cycle_two_modules() {
284        // A → B → A
285        let a = make_module("Alpha", &["Beta"]);
286        let b = make_module("Beta", &["Alpha"]);
287        let cycles = detect_cycles(&[a, b]);
288        assert_eq!(cycles.len(), 1);
289        // Normalised: Alpha → Beta → Alpha (Alpha < Beta lexicographically)
290        assert_eq!(cycles[0].modules, vec!["Alpha", "Beta", "Alpha"]);
291    }
292
293    #[test]
294    fn test_three_module_cycle() {
295        // A → B → C → A
296        let a = make_module("Alpha", &["Beta"]);
297        let b = make_module("Beta", &["Gamma"]);
298        let c = make_module("Gamma", &["Alpha"]);
299        let cycles = detect_cycles(&[a, b, c]);
300        assert_eq!(cycles.len(), 1);
301        assert_eq!(cycles[0].modules, vec!["Alpha", "Beta", "Gamma", "Alpha"]);
302    }
303
304    #[test]
305    fn test_unknown_import_ignored() {
306        // A imports from Unknown (not in the slice) — not a cycle.
307        let a = make_module("Alpha", &["Unknown"]);
308        assert!(detect_cycles(&[a]).is_empty());
309    }
310
311    #[test]
312    fn test_self_import_is_cycle() {
313        // A → A
314        let a = make_module("Alpha", &["Alpha"]);
315        let cycles = detect_cycles(&[a]);
316        assert_eq!(cycles.len(), 1);
317        assert_eq!(cycles[0].modules, vec!["Alpha", "Alpha"]);
318    }
319
320    #[test]
321    fn test_display_format() {
322        let cycle = ImportCycle {
323            modules: vec!["Alpha".to_string(), "Beta".to_string(), "Alpha".to_string()],
324        };
325        assert_eq!(cycle.display(), "Alpha → Beta → Alpha");
326    }
327
328    #[test]
329    fn test_cycle_reported_once() {
330        // Same two-node cycle entered from different starting points — must
331        // appear exactly once in the output.
332        let a = make_module("Alpha", &["Beta"]);
333        let b = make_module("Beta", &["Alpha"]);
334        // Provide them in both orderings.
335        let cycles1 = detect_cycles(&[a.clone(), b.clone()]);
336        let cycles2 = detect_cycles(&[b.clone(), a.clone()]);
337        assert_eq!(cycles1.len(), 1);
338        assert_eq!(cycles2.len(), 1);
339        assert_eq!(cycles1[0].modules, cycles2[0].modules);
340    }
341
342    // ── topological_order tests ───────────────────────────────────────────────
343
344    #[test]
345    fn test_topo_single_module() {
346        let m = make_module("Alpha", &[]);
347        assert_eq!(topological_order(&[m]), vec![0]);
348    }
349
350    #[test]
351    fn test_topo_linear_chain() {
352        // A imports from B, B imports from C → order should be C, B, A.
353        let a = make_module("Alpha", &["Beta"]);
354        let b = make_module("Beta", &["Gamma"]);
355        let c = make_module("Gamma", &[]);
356        let order = topological_order(&[a, b, c]);
357        // Gamma (2) before Beta (1) before Alpha (0).
358        assert_eq!(order, vec![2, 1, 0]);
359    }
360
361    #[test]
362    fn test_topo_multiple_independent_leaves() {
363        // Two independent leaf modules (Beta, Gamma) feed into Alpha;
364        // expressed as a chain: Alpha→Beta, Gamma is independent.
365        // Ordering must put Beta before Alpha; Gamma can be anywhere.
366        let a = make_module("Alpha", &["Beta"]);
367        let b = make_module("Beta", &[]);
368        let c = make_module("Gamma", &[]);
369        let order = topological_order(&[a, b, c]);
370        let alpha_pos = order.iter().position(|&i| i == 0).unwrap();
371        let beta_pos = order.iter().position(|&i| i == 1).unwrap();
372        assert!(beta_pos < alpha_pos, "Beta must be generated before Alpha");
373    }
374
375    #[test]
376    fn test_topo_no_cross_deps() {
377        // Two independent modules — original order is preserved.
378        let a = make_module("Alpha", &[]);
379        let b = make_module("Beta", &[]);
380        let order = topological_order(&[a, b]);
381        assert_eq!(order, vec![0, 1]);
382    }
383}