Skip to main content

normalize_graph/
lib.rs

1//! Pure graph algorithms for dependency analysis.
2//!
3//! Operates on abstract graphs represented as adjacency lists
4//! (`HashMap<String, HashSet<String>>`). No filesystem, CLI, or
5//! normalize-specific types.
6//!
7//! Algorithms: Tarjan SCC, bridge-finding, diamond detection,
8//! transitive edge detection, longest chains, weakly connected components.
9
10use normalize_output::OutputFormatter;
11use nu_ansi_term::Color;
12use serde::Serialize;
13use std::collections::{HashMap, HashSet, VecDeque};
14
15/// Minimum chain length (in nodes) to include in the longest-chains report.
16///
17/// A chain of 4 nodes has depth 3 (3 edges). Depth 2 chains are common in any
18/// project with a utilities layer and are not interesting signals. Starting at
19/// depth ≥ 3 surfaces only chains that likely indicate layering violations or
20/// overly deep call stacks worth reviewing.
21const MIN_CHAIN_NODE_COUNT: usize = 4;
22
23// ---------------------------------------------------------------------------
24// Types
25// ---------------------------------------------------------------------------
26
27/// What the graph nodes represent.
28#[derive(
29    Debug, Clone, Copy, PartialEq, Eq, Serialize, serde::Deserialize, schemars::JsonSchema,
30)]
31#[serde(rename_all = "lowercase")]
32pub enum GraphTarget {
33    /// Nodes are files, edges are imports
34    Modules,
35    /// Nodes are functions (file:symbol), edges are calls
36    Symbols,
37    /// Nodes are types, edges are type references (fields, params, inheritance, etc.)
38    Types,
39}
40
41impl std::str::FromStr for GraphTarget {
42    type Err = String;
43    fn from_str(s: &str) -> Result<Self, Self::Err> {
44        match s {
45            "modules" => Ok(Self::Modules),
46            "symbols" => Ok(Self::Symbols),
47            "types" => Ok(Self::Types),
48            _ => Err(format!(
49                "unknown graph target '{}', expected 'modules', 'symbols', or 'types'",
50                s
51            )),
52        }
53    }
54}
55
56impl std::fmt::Display for GraphTarget {
57    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58        match self {
59            Self::Modules => write!(f, "modules"),
60            Self::Symbols => write!(f, "symbols"),
61            Self::Types => write!(f, "types"),
62        }
63    }
64}
65
66/// Overall graph statistics.
67#[derive(Debug, Serialize, schemars::JsonSchema)]
68pub struct GraphStats {
69    pub nodes: usize,
70    pub edges: usize,
71    /// Edge density: edges / (nodes × (nodes − 1)); 0.0 for graphs with fewer than 2 nodes.
72    pub density: f64,
73    pub weakly_connected_components: usize,
74    pub largest_component_size: usize,
75    pub scc_count: usize,
76    /// Number of strongly connected components with more than one node (actual circular-dependency clusters).
77    pub nontrivial_scc_count: usize,
78    pub diamond_count: usize,
79    /// Number of bridge edges whose removal would disconnect the graph.
80    pub bridge_count: usize,
81    /// Number of redundant transitive edges (A→C where A→B→C already exists).
82    pub transitive_edge_count: usize,
83    /// Depth (edge count) of the longest import chain.
84    pub max_chain_depth: usize,
85    /// Total number of import chains at or exceeding the depth threshold.
86    pub chain_count: usize,
87    /// Number of nodes with no inbound edges (unreachable or potentially dead code).
88    pub dead_node_count: usize,
89}
90
91/// A strongly connected component (circular-dependency cluster).
92#[derive(Debug, Serialize, schemars::JsonSchema)]
93pub struct Scc {
94    /// Modules that are part of this strongly connected component.
95    pub modules: Vec<String>,
96    /// Number of edges within the SCC
97    pub internal_edges: usize,
98}
99
100/// A diamond dependency: source imports left and right, both import target.
101#[derive(Debug, Serialize, schemars::JsonSchema)]
102pub struct Diamond {
103    /// The module that starts the diamond (imports both `left` and `right`).
104    pub source: String,
105    /// The left intermediate module (imports `target`).
106    pub left: String,
107    /// The right intermediate module (imports `target`).
108    pub right: String,
109    /// The shared dependency that both intermediate modules import.
110    pub target: String,
111}
112
113/// A bridge edge whose removal disconnects the graph.
114#[derive(Debug, Serialize, schemars::JsonSchema)]
115pub struct BridgeEdge {
116    /// The importing module.
117    pub from: String,
118    /// The imported module.
119    pub to: String,
120}
121
122/// A deep import chain (longest dependency path).
123#[derive(Debug, Serialize, schemars::JsonSchema)]
124pub struct ImportChain {
125    /// Modules in the chain from start to end, ordered by import depth.
126    pub modules: Vec<String>,
127    /// Length of the chain (number of edges, not nodes)
128    pub depth: usize,
129}
130
131/// A transitive (redundant) import: A→C is redundant because A→B→C.
132#[derive(Debug, Serialize, schemars::JsonSchema)]
133pub struct TransitiveEdge {
134    /// The importing module.
135    pub from: String,
136    /// The transitively reachable module (redundant direct dependency).
137    pub to: String,
138    /// The intermediate module that already provides the transitive path.
139    pub via: String,
140}
141
142/// A file that depends on the query target (modules graph only).
143#[derive(Debug, Serialize, schemars::JsonSchema)]
144pub struct DependentEntry {
145    pub file: String,
146    pub depth: usize,
147    pub has_tests: bool,
148    pub fan_in: usize,
149}
150
151/// Blast radius summary statistics (modules graph only).
152#[derive(Debug, Serialize, schemars::JsonSchema)]
153pub struct BlastRadius {
154    pub direct_count: usize,
155    pub transitive_count: usize,
156    pub untested_count: usize,
157    pub max_depth: usize,
158}
159
160/// Report for reverse dependency queries: what depends on a given file/symbol.
161///
162/// For `--on modules` (default): structured output with depth, test coverage,
163/// fan-in, and blast radius statistics.
164/// For `--on symbols` / `--on types`: flat alphabetical list of dependents.
165#[derive(Debug, Serialize, schemars::JsonSchema)]
166pub struct DependentsReport {
167    /// The file or symbol being queried.
168    pub target: String,
169    /// Graph node kind used for the query.
170    pub graph_target: GraphTarget,
171    /// Direct dependents (depth = 1) — populated for modules graph.
172    #[serde(skip_serializing_if = "Vec::is_empty", default)]
173    pub direct: Vec<DependentEntry>,
174    /// Transitive dependents (depth > 1) — populated for modules graph.
175    #[serde(skip_serializing_if = "Vec::is_empty", default)]
176    pub transitive: Vec<DependentEntry>,
177    /// Blast radius summary — populated for modules graph.
178    #[serde(skip_serializing_if = "Option::is_none")]
179    pub blast_radius: Option<BlastRadius>,
180    /// Untested impact paths — populated for modules graph.
181    #[serde(skip_serializing_if = "Vec::is_empty", default)]
182    pub untested_paths: Vec<String>,
183    /// Flat dependent list — populated for symbols/types graph.
184    #[serde(skip_serializing_if = "Vec::is_empty", default)]
185    pub dependents: Vec<String>,
186}
187
188impl normalize_output::OutputFormatter for DependentsReport {
189    fn format_text(&self) -> String {
190        match self.graph_target {
191            GraphTarget::Modules => self.format_modules_text(false),
192            GraphTarget::Symbols | GraphTarget::Types => self.format_flat_text(false),
193        }
194    }
195
196    fn format_pretty(&self) -> String {
197        match self.graph_target {
198            GraphTarget::Modules => self.format_modules_text(true),
199            GraphTarget::Symbols | GraphTarget::Types => self.format_flat_text(true),
200        }
201    }
202}
203
204impl DependentsReport {
205    fn format_modules_text(&self, pretty: bool) -> String {
206        let mut lines = Vec::new();
207        let total = self.direct.len() + self.transitive.len();
208
209        if pretty {
210            lines.push(
211                Color::Cyan
212                    .bold()
213                    .paint(format!("# Dependents of {}", self.target))
214                    .to_string(),
215            );
216        } else {
217            lines.push(format!("# Dependents of {}", self.target));
218        }
219        lines.push(String::new());
220
221        if let Some(ref br) = self.blast_radius {
222            if pretty {
223                lines.push(format!(
224                    "{} files affected · {} direct · {} transitive · {} untested · max depth {}",
225                    Color::Default.bold().paint(total.to_string()),
226                    Color::Green.paint(br.direct_count.to_string()),
227                    Color::Yellow.paint(br.transitive_count.to_string()),
228                    Color::Red.paint(br.untested_count.to_string()),
229                    br.max_depth
230                ));
231            } else {
232                lines.push(format!(
233                    "{} files affected · {} direct · {} transitive · {} untested · max depth {}",
234                    total, br.direct_count, br.transitive_count, br.untested_count, br.max_depth
235                ));
236            }
237        }
238
239        if !self.direct.is_empty() {
240            lines.push(String::new());
241            if pretty {
242                lines.push(
243                    Color::Green
244                        .bold()
245                        .paint(format!("## Direct ({})", self.direct.len()))
246                        .to_string(),
247                );
248            } else {
249                lines.push(format!("## Direct ({})", self.direct.len()));
250            }
251            for e in &self.direct {
252                let tested = if e.has_tests {
253                    if pretty {
254                        Color::Green.paint("tested").to_string()
255                    } else {
256                        "tested".to_string()
257                    }
258                } else if pretty {
259                    Color::Red.bold().paint("UNTESTED").to_string()
260                } else {
261                    "UNTESTED".to_string()
262                };
263                lines.push(format!(
264                    "  {:<40} depth {}  {}  fan-in {}",
265                    e.file, e.depth, tested, e.fan_in
266                ));
267            }
268        }
269
270        if !self.transitive.is_empty() {
271            lines.push(String::new());
272            if pretty {
273                lines.push(
274                    Color::Yellow
275                        .bold()
276                        .paint(format!("## Transitive ({})", self.transitive.len()))
277                        .to_string(),
278                );
279            } else {
280                lines.push(format!("## Transitive ({})", self.transitive.len()));
281            }
282            for e in &self.transitive {
283                let tested = if e.has_tests {
284                    if pretty {
285                        Color::Green.paint("tested").to_string()
286                    } else {
287                        "tested".to_string()
288                    }
289                } else if pretty {
290                    Color::Red.bold().paint("UNTESTED").to_string()
291                } else {
292                    "UNTESTED".to_string()
293                };
294                lines.push(format!(
295                    "  {:<40} depth {}  {}  fan-in {}",
296                    e.file, e.depth, tested, e.fan_in
297                ));
298            }
299        }
300
301        if !self.untested_paths.is_empty() {
302            lines.push(String::new());
303            if pretty {
304                lines.push(
305                    Color::Red
306                        .bold()
307                        .paint(format!(
308                            "## Untested Impact Paths ({})",
309                            self.untested_paths.len()
310                        ))
311                        .to_string(),
312                );
313            } else {
314                lines.push(format!(
315                    "## Untested Impact Paths ({})",
316                    self.untested_paths.len()
317                ));
318            }
319            for p in &self.untested_paths {
320                lines.push(format!("  {}", p));
321            }
322        }
323
324        lines.join("\n")
325    }
326
327    fn format_flat_text(&self, pretty: bool) -> String {
328        let kind = match self.graph_target {
329            GraphTarget::Modules => "modules",
330            GraphTarget::Symbols => "symbols",
331            GraphTarget::Types => "types",
332        };
333        let mut out = Vec::new();
334        if pretty {
335            out.push(format!(
336                "{} {} {}",
337                Color::Cyan.bold().paint("# Dependents of"),
338                Color::Default.bold().paint(&self.target),
339                Color::Default.dimmed().paint(format!(
340                    "({} {} depend on it)",
341                    self.dependents.len(),
342                    kind
343                )),
344            ));
345            for dep in &self.dependents {
346                out.push(format!("  {}", Color::White.paint(dep.as_str())));
347            }
348        } else {
349            out.push(format!(
350                "# Dependents of {} ({} {} depend on it)",
351                self.target,
352                self.dependents.len(),
353                kind
354            ));
355            for dep in &self.dependents {
356                out.push(format!("  {}", dep));
357            }
358        }
359        out.join("\n")
360    }
361}
362
363/// Full graph analysis report.
364#[derive(Debug, Serialize, schemars::JsonSchema)]
365pub struct GraphReport {
366    pub target: GraphTarget,
367    pub stats: GraphStats,
368    pub sccs: Vec<Scc>,
369    pub diamonds: Vec<Diamond>,
370    pub bridges: Vec<BridgeEdge>,
371    pub longest_chains: Vec<ImportChain>,
372    pub transitive_edges: Vec<TransitiveEdge>,
373    /// Nodes with no inbound edges (files/symbols that nothing imports/calls).
374    /// Sorted alphabetically. Does not include fully isolated nodes (no edges at all).
375    pub dead_nodes: Vec<String>,
376}
377
378// ---------------------------------------------------------------------------
379// Algorithms
380// ---------------------------------------------------------------------------
381
382/// Collect all nodes from the import graph.
383pub fn all_nodes(imports: &HashMap<String, HashSet<String>>) -> HashSet<String> {
384    let mut nodes = HashSet::new();
385    for (k, vs) in imports {
386        nodes.insert(k.clone());
387        for v in vs {
388            nodes.insert(v.clone());
389        }
390    }
391    nodes
392}
393
394/// Count directed edges.
395pub fn edge_count(imports: &HashMap<String, HashSet<String>>) -> usize {
396    imports.values().map(|s| s.len()).sum()
397}
398
399/// Build the reverse (transposed) graph: edges point from target to source.
400pub fn reverse_graph(
401    imports: &HashMap<String, HashSet<String>>,
402) -> HashMap<String, HashSet<String>> {
403    let mut rev: HashMap<String, HashSet<String>> = HashMap::new();
404    for (src, targets) in imports {
405        for tgt in targets {
406            rev.entry(tgt.clone()).or_default().insert(src.clone());
407        }
408    }
409    rev
410}
411
412/// Find all nodes that (transitively) depend on `target` via BFS on the reverse graph.
413/// Returns a sorted list excluding the target itself.
414pub fn find_dependents(imports: &HashMap<String, HashSet<String>>, target: &str) -> Vec<String> {
415    let rev = reverse_graph(imports);
416    let mut visited: HashSet<String> = HashSet::new();
417    let mut queue: VecDeque<String> = VecDeque::new();
418    queue.push_back(target.to_string());
419    visited.insert(target.to_string());
420
421    while let Some(node) = queue.pop_front() {
422        if let Some(parents) = rev.get(&node) {
423            for parent in parents {
424                if visited.insert(parent.clone()) {
425                    queue.push_back(parent.clone());
426                }
427            }
428        }
429    }
430
431    let mut result: Vec<String> = visited.into_iter().filter(|n| n != target).collect();
432    result.sort();
433    result
434}
435
436/// Find nodes with no inbound edges (nothing imports/calls them) that do have
437/// outbound edges (they import/call something). These are unreachable internal
438/// nodes — potential dead code. Entry points (main.rs, lib.rs) are included;
439/// callers can filter based on their heuristics.
440pub fn find_dead_nodes(imports: &HashMap<String, HashSet<String>>) -> Vec<String> {
441    // Collect all nodes that appear as targets (have at least one inbound edge)
442    let mut has_inbound: HashSet<&str> = HashSet::new();
443    for targets in imports.values() {
444        for t in targets {
445            has_inbound.insert(t.as_str());
446        }
447    }
448
449    // Nodes that have outbound edges but no inbound edges
450    let mut dead: Vec<String> = imports
451        .keys()
452        .filter(|n| !has_inbound.contains(n.as_str()))
453        .cloned()
454        .collect();
455    dead.sort();
456    dead
457}
458
459/// Weakly connected components via BFS on the undirected view.
460pub fn weakly_connected_components(imports: &HashMap<String, HashSet<String>>) -> Vec<Vec<String>> {
461    // Build undirected adjacency
462    let mut adj: HashMap<String, HashSet<String>> = HashMap::new();
463    for (u, vs) in imports {
464        for v in vs {
465            adj.entry(u.clone()).or_default().insert(v.clone());
466            adj.entry(v.clone()).or_default().insert(u.clone());
467        }
468    }
469
470    let mut visited: HashSet<String> = HashSet::new();
471    let mut components = Vec::new();
472    let nodes = all_nodes(imports);
473
474    for node in &nodes {
475        if visited.contains(node) {
476            continue;
477        }
478        let mut component = Vec::new();
479        let mut queue = VecDeque::new();
480        queue.push_back(node.clone());
481        visited.insert(node.clone());
482
483        while let Some(cur) = queue.pop_front() {
484            component.push(cur.clone());
485            if let Some(neighbors) = adj.get(&cur) {
486                for n in neighbors {
487                    if visited.insert(n.clone()) {
488                        queue.push_back(n.clone());
489                    }
490                }
491            }
492        }
493        component.sort();
494        components.push(component);
495    }
496
497    components.sort_by_key(|c| std::cmp::Reverse(c.len()));
498    components
499}
500
501/// Iterative Tarjan's SCC algorithm.
502pub fn tarjan_sccs(imports: &HashMap<String, HashSet<String>>) -> Vec<Vec<String>> {
503    let nodes = all_nodes(imports);
504    let mut index_counter = 0usize;
505    let mut indices: HashMap<String, usize> = HashMap::new();
506    let mut lowlink: HashMap<String, usize> = HashMap::new();
507    let mut on_stack: HashSet<String> = HashSet::new();
508    let mut stack: Vec<String> = Vec::new();
509    let mut sccs: Vec<Vec<String>> = Vec::new();
510
511    // Iterative DFS using an explicit call stack
512    #[derive(Debug)]
513    enum Frame {
514        Enter(String),
515        Resume(String, String), // (node, neighbor) — resume after returning from neighbor
516    }
517
518    for start in &nodes {
519        if indices.contains_key(start) {
520            continue;
521        }
522
523        let mut call_stack: Vec<Frame> = vec![Frame::Enter(start.clone())];
524
525        while let Some(frame) = call_stack.pop() {
526            match frame {
527                Frame::Enter(node) => {
528                    if indices.contains_key(&node) {
529                        continue;
530                    }
531                    indices.insert(node.clone(), index_counter);
532                    lowlink.insert(node.clone(), index_counter);
533                    index_counter += 1;
534                    stack.push(node.clone());
535                    on_stack.insert(node.clone());
536
537                    let neighbors: Vec<String> = imports
538                        .get(&node)
539                        .map(|s| s.iter().cloned().collect())
540                        .unwrap_or_default();
541
542                    // Push neighbors in reverse so we process them in order
543                    for neighbor in neighbors.into_iter().rev() {
544                        if !indices.contains_key(&neighbor) {
545                            call_stack.push(Frame::Resume(node.clone(), neighbor.clone()));
546                            call_stack.push(Frame::Enter(neighbor));
547                        } else if on_stack.contains(&neighbor) {
548                            // normalize-syntax-allow: rust/unwrap-in-impl - node inserted into indices/lowlink at Frame::Enter
549                            let nl = *lowlink.get(&node).unwrap(); // normalize-syntax-allow: rust/unwrap-in-impl - node inserted at Frame::Enter
550                            let ni = *indices.get(&neighbor).unwrap(); // normalize-syntax-allow: rust/unwrap-in-impl - neighbor was already visited (in indices)
551                            lowlink.insert(node.clone(), nl.min(ni));
552                        }
553                    }
554
555                    // After processing all neighbors, check if this is a root
556                    // We need a sentinel to know when we're done with a node
557                    call_stack.push(Frame::Resume(node.clone(), String::new()));
558                }
559                Frame::Resume(node, neighbor) => {
560                    if neighbor.is_empty() {
561                        // Sentinel: all neighbors processed, check if root of SCC
562                        // normalize-syntax-allow: rust/unwrap-in-impl - node inserted into indices/lowlink at Frame::Enter
563                        let nl = *lowlink.get(&node).unwrap();
564                        let ni = *indices.get(&node).unwrap(); // normalize-syntax-allow: rust/unwrap-in-impl - node inserted at Frame::Enter
565                        if nl == ni {
566                            let mut scc = Vec::new();
567                            while let Some(w) = stack.pop() {
568                                on_stack.remove(&w);
569                                scc.push(w.clone());
570                                if w == node {
571                                    break;
572                                }
573                            }
574                            scc.sort();
575                            sccs.push(scc);
576                        }
577                    } else {
578                        // Resume after DFS into neighbor
579                        // normalize-syntax-allow: rust/unwrap-in-impl - node inserted into lowlink at Frame::Enter
580                        let nl = *lowlink.get(&node).unwrap();
581                        let neighbor_ll = *lowlink.get(&neighbor).unwrap_or(&usize::MAX);
582                        lowlink.insert(node.clone(), nl.min(neighbor_ll));
583                    }
584                }
585            }
586        }
587    }
588
589    sccs
590}
591
592/// Find nontrivial SCCs (size > 1) with internal edge counts.
593pub fn find_sccs(imports: &HashMap<String, HashSet<String>>) -> Vec<Scc> {
594    let raw = tarjan_sccs(imports);
595    let mut result = Vec::new();
596    for modules in raw {
597        if modules.len() <= 1 {
598            continue;
599        }
600        let member_set: HashSet<&str> = modules.iter().map(|s| s.as_str()).collect();
601        let mut internal_edges = 0;
602        for m in &modules {
603            if let Some(targets) = imports.get(m) {
604                for t in targets {
605                    if member_set.contains(t.as_str()) {
606                        internal_edges += 1;
607                    }
608                }
609            }
610        }
611        result.push(Scc {
612            modules,
613            internal_edges,
614        });
615    }
616    result.sort_by(|a, b| b.modules.len().cmp(&a.modules.len()));
617    result
618}
619
620/// Diamond detection: for each node A with imports {B₁,B₂,...}, check pairs for shared targets.
621pub fn find_diamonds(imports: &HashMap<String, HashSet<String>>, limit: usize) -> Vec<Diamond> {
622    let mut diamonds = Vec::new();
623    let mut seen: HashSet<(String, String, String, String)> = HashSet::new();
624
625    let mut sources: Vec<&String> = imports.keys().collect();
626    sources.sort();
627
628    for source in sources {
629        let deps: Vec<&String> = match imports.get(source) {
630            Some(s) => {
631                let mut v: Vec<&String> = s.iter().collect();
632                v.sort();
633                v
634            }
635            None => continue,
636        };
637        if deps.len() < 2 {
638            continue;
639        }
640
641        for i in 0..deps.len() {
642            let left_targets = match imports.get(deps[i]) {
643                Some(s) => s,
644                None => continue,
645            };
646            for j in (i + 1)..deps.len() {
647                let right_targets = match imports.get(deps[j]) {
648                    Some(s) => s,
649                    None => continue,
650                };
651                // Find shared targets
652                for target in left_targets.intersection(right_targets) {
653                    let key = (
654                        source.clone(),
655                        deps[i].clone(),
656                        deps[j].clone(),
657                        target.clone(),
658                    );
659                    if seen.insert(key) {
660                        diamonds.push(Diamond {
661                            source: source.clone(),
662                            left: deps[i].clone(),
663                            right: deps[j].clone(),
664                            target: target.clone(),
665                        });
666                        if diamonds.len() >= limit {
667                            return diamonds;
668                        }
669                    }
670                }
671            }
672        }
673    }
674    diamonds
675}
676
677/// Bridge finding via Tarjan's bridge algorithm on the undirected view.
678/// Only report bridges where exactly one directed edge exists (bidirectional ≠ bridge).
679pub fn find_bridges(imports: &HashMap<String, HashSet<String>>) -> Vec<BridgeEdge> {
680    // Build undirected adjacency list with neighbor indices for efficiency
681    let nodes = all_nodes(imports);
682    let node_list: Vec<String> = {
683        let mut v: Vec<String> = nodes.into_iter().collect();
684        v.sort();
685        v
686    };
687    let node_idx: HashMap<&str, usize> = node_list
688        .iter()
689        .enumerate()
690        .map(|(i, s)| (s.as_str(), i))
691        .collect();
692    let n = node_list.len();
693
694    let mut adj: Vec<Vec<usize>> = vec![Vec::new(); n];
695    let mut directed_edges: HashSet<(usize, usize)> = HashSet::new();
696
697    for (u, vs) in imports {
698        // normalize-syntax-allow: rust/unwrap-in-impl - node_idx built from the same node_list as imports keys
699        let ui = *node_idx.get(u.as_str()).unwrap();
700        for v in vs {
701            // normalize-syntax-allow: rust/unwrap-in-impl - node_idx built from the same node_list as imports values
702            let vi = *node_idx.get(v.as_str()).unwrap();
703            directed_edges.insert((ui, vi));
704            if !adj[ui].contains(&vi) {
705                adj[ui].push(vi);
706            }
707            if !adj[vi].contains(&ui) {
708                adj[vi].push(ui);
709            }
710        }
711    }
712
713    // Iterative bridge-finding (Tarjan's)
714    let mut disc = vec![0usize; n];
715    let mut low = vec![0usize; n];
716    let mut visited = vec![false; n];
717    let mut timer = 1usize;
718    let mut bridges_idx: Vec<(usize, usize)> = Vec::new();
719
720    #[derive(Debug)]
721    struct BridgeFrame {
722        node: usize,
723        parent: usize, // usize::MAX for none
724        adj_idx: usize,
725    }
726
727    for start in 0..n {
728        if visited[start] {
729            continue;
730        }
731
732        let mut stack = vec![BridgeFrame {
733            node: start,
734            parent: usize::MAX,
735            adj_idx: 0,
736        }];
737        visited[start] = true;
738        disc[start] = timer;
739        low[start] = timer;
740        timer += 1;
741
742        while let Some(frame) = stack.last_mut() {
743            let u = frame.node;
744            if frame.adj_idx < adj[u].len() {
745                let v = adj[u][frame.adj_idx];
746                frame.adj_idx += 1;
747
748                if !visited[v] {
749                    visited[v] = true;
750                    disc[v] = timer;
751                    low[v] = timer;
752                    timer += 1;
753                    stack.push(BridgeFrame {
754                        node: v,
755                        parent: u,
756                        adj_idx: 0,
757                    });
758                } else if v != frame.parent {
759                    low[u] = low[u].min(disc[v]);
760                }
761            } else {
762                // Done with this node — pop and update parent
763                let u = frame.node;
764                let parent = frame.parent;
765                stack.pop();
766
767                if parent != usize::MAX {
768                    low[parent] = low[parent].min(low[u]);
769                    if low[u] > disc[parent] {
770                        bridges_idx.push((parent, u));
771                    }
772                }
773            }
774        }
775    }
776
777    // Map back to directed edges, only report if unidirectional
778    let mut result = Vec::new();
779    for (a, b) in bridges_idx {
780        let has_ab = directed_edges.contains(&(a, b));
781        let has_ba = directed_edges.contains(&(b, a));
782        // Bidirectional edges aren't true dependency bridges
783        if has_ab && !has_ba {
784            result.push(BridgeEdge {
785                from: node_list[a].clone(),
786                to: node_list[b].clone(),
787            });
788        } else if has_ba && !has_ab {
789            result.push(BridgeEdge {
790                from: node_list[b].clone(),
791                to: node_list[a].clone(),
792            });
793        }
794    }
795    result.sort_by(|a, b| a.from.cmp(&b.from).then(a.to.cmp(&b.to)));
796    result
797}
798
799/// Transitive edge detection: A→C is redundant if ∃B: A→B and B→C.
800pub fn find_transitive_edges(
801    imports: &HashMap<String, HashSet<String>>,
802    limit: usize,
803) -> Vec<TransitiveEdge> {
804    let mut result = Vec::new();
805    let mut sources: Vec<&String> = imports.keys().collect();
806    sources.sort();
807
808    'outer: for a in &sources {
809        let a_targets = match imports.get(*a) {
810            Some(s) => s,
811            None => continue,
812        };
813        for c in a_targets {
814            // Check if any other direct import B of A has C as a target
815            for b in a_targets {
816                if b == c {
817                    continue;
818                }
819                if let Some(b_targets) = imports.get(b)
820                    && b_targets.contains(c)
821                {
822                    result.push(TransitiveEdge {
823                        from: (*a).clone(),
824                        to: c.clone(),
825                        via: b.clone(),
826                    });
827                    if result.len() >= limit {
828                        break 'outer;
829                    }
830                    break; // One witness suffices per (A, C)
831                }
832            }
833        }
834    }
835    result
836}
837
838/// Count all transitive edges (without limit, for stats).
839pub fn count_transitive_edges(imports: &HashMap<String, HashSet<String>>) -> usize {
840    let mut count = 0;
841    for a_targets in imports.values() {
842        for c in a_targets {
843            for b in a_targets {
844                if b == c {
845                    continue;
846                }
847                if let Some(b_targets) = imports.get(b)
848                    && b_targets.contains(c)
849                {
850                    count += 1;
851                    break; // One witness per (A, C)
852                }
853            }
854        }
855    }
856    count
857}
858
859/// Find the longest import chains (dependency paths) in the graph.
860///
861/// Returns up to `limit` chains sorted by depth (longest first). When `limit`
862/// is `0` all qualifying chains are returned. Only chains whose node count meets
863/// [`MIN_CHAIN_NODE_COUNT`] are included.
864///
865/// Chains dominated by a suffix of a longer chain are removed: if chain B's
866/// modules are a suffix of chain A's modules, B is dropped. This keeps the
867/// result set non-redundant — each returned chain represents a unique root.
868///
869/// Uses DFS from each node with memoization to find the longest path, avoiding cycles.
870pub fn find_longest_chains(
871    graph: &HashMap<String, HashSet<String>>,
872    limit: usize,
873) -> Vec<ImportChain> {
874    let mut longest_paths: Vec<ImportChain> = Vec::new();
875    let mut memo: HashMap<String, Vec<String>> = HashMap::new();
876
877    for start in graph.keys() {
878        let mut visited: HashSet<String> = HashSet::new();
879        let path = longest_path_from(start, graph, &mut visited, &mut memo);
880        if path.len() >= MIN_CHAIN_NODE_COUNT {
881            longest_paths.push(ImportChain {
882                depth: path.len() - 1,
883                modules: path,
884            });
885        }
886    }
887
888    longest_paths.sort_by(|a, b| b.depth.cmp(&a.depth));
889
890    // Deduplicate — if a shorter chain is a suffix of a longer one, skip it
891    let mut unique_chains: Vec<ImportChain> = Vec::new();
892    for chain in longest_paths {
893        let dominated = unique_chains.iter().any(|existing| {
894            existing.modules.len() > chain.modules.len()
895                && existing.modules.ends_with(&chain.modules)
896        });
897        if !dominated {
898            unique_chains.push(chain);
899        }
900        if unique_chains.len() >= limit {
901            break;
902        }
903    }
904
905    unique_chains
906}
907
908/// Find the longest path from a node using DFS with memoization.
909///
910/// # Memoization limitation
911///
912/// Results are cached keyed only by `node`. When the same node is reached from
913/// two different roots the first cached result is reused, even though the
914/// `visited` set differs between the two calls. This means the cached result
915/// may be shorter than what would be computed from a different root (because
916/// some successors were marked visited in the first traversal). The trade-off
917/// is acceptable: the memo avoids O(n²) worst-case work and the goal is
918/// finding representative longest paths, not an exhaustive enumeration.
919pub fn longest_path_from(
920    node: &str,
921    graph: &HashMap<String, HashSet<String>>,
922    visited: &mut HashSet<String>,
923    memo: &mut HashMap<String, Vec<String>>,
924) -> Vec<String> {
925    if let Some(cached) = memo.get(node) {
926        return cached.clone();
927    }
928
929    visited.insert(node.to_string());
930
931    let mut longest: Vec<String> = vec![node.to_string()];
932
933    if let Some(neighbors) = graph.get(node) {
934        for neighbor in neighbors {
935            if !visited.contains(neighbor) {
936                let sub_path = longest_path_from(neighbor, graph, visited, memo);
937                if sub_path.len() + 1 > longest.len() {
938                    let mut new_path = vec![node.to_string()];
939                    new_path.extend(sub_path);
940                    longest = new_path;
941                }
942            }
943        }
944    }
945
946    visited.remove(node);
947    memo.insert(node.to_string(), longest.clone());
948    longest
949}
950
951// ---------------------------------------------------------------------------
952// Top-level analysis function
953// ---------------------------------------------------------------------------
954
955/// Analyze graph-theoretic properties of an abstract dependency graph.
956///
957/// Takes an adjacency list (`HashMap<String, HashSet<String>>`) and a limit
958/// for the number of items to return in each section. Pass `0` or `usize::MAX`
959/// for no limit — `0` is treated as "unlimited" so callers do not accidentally
960/// truncate all results to empty Vecs while stats counts still reflect the
961/// full data (which would produce misleading reports). The `target` parameter
962/// is recorded in the report for display.
963pub fn analyze_graph_data(
964    imports: &HashMap<String, HashSet<String>>,
965    target: GraphTarget,
966    limit: usize,
967) -> GraphReport {
968    // Treat 0 as "no limit" — callers should pass usize::MAX explicitly when
969    // they want unlimited, but 0 is a common default and should not silently
970    // truncate every result Vec to empty.
971    let limit = if limit == 0 { usize::MAX } else { limit };
972    let nodes = all_nodes(imports);
973    let node_count = nodes.len();
974    let edges = edge_count(imports);
975    let density = if node_count > 1 {
976        edges as f64 / (node_count as f64 * (node_count as f64 - 1.0))
977    } else {
978        0.0
979    };
980
981    let wcc = weakly_connected_components(imports);
982    let wcc_count = wcc.len();
983    let largest_component = wcc.first().map(|c| c.len()).unwrap_or(0);
984
985    let all_sccs = tarjan_sccs(imports);
986    let scc_count = all_sccs.len();
987    let mut sccs = find_sccs(imports);
988    let nontrivial_scc_count = sccs.len();
989
990    let mut diamonds = find_diamonds(
991        imports,
992        if limit == usize::MAX {
993            usize::MAX
994        } else {
995            limit * 10
996        },
997    );
998    let diamond_count = diamonds.len();
999
1000    let bridges = find_bridges(imports);
1001    let bridge_count = bridges.len();
1002
1003    let mut longest_chains = find_longest_chains(
1004        imports,
1005        if limit == usize::MAX {
1006            usize::MAX
1007        } else {
1008            limit
1009        },
1010    );
1011    let max_chain_depth = longest_chains.first().map(|c| c.depth).unwrap_or(0);
1012    let chain_count = longest_chains.len();
1013
1014    let transitive_edge_count = count_transitive_edges(imports);
1015    let mut transitive_edges = find_transitive_edges(
1016        imports,
1017        if limit == usize::MAX {
1018            usize::MAX
1019        } else {
1020            limit
1021        },
1022    );
1023
1024    let mut dead_nodes = find_dead_nodes(imports);
1025    let dead_node_count = dead_nodes.len();
1026
1027    // Apply limits
1028    sccs.truncate(limit);
1029    diamonds.truncate(limit);
1030    longest_chains.truncate(limit);
1031    transitive_edges.truncate(limit);
1032    dead_nodes.truncate(limit);
1033
1034    let stats = GraphStats {
1035        nodes: node_count,
1036        edges,
1037        density,
1038        weakly_connected_components: wcc_count,
1039        largest_component_size: largest_component,
1040        scc_count,
1041        nontrivial_scc_count,
1042        diamond_count,
1043        bridge_count,
1044        transitive_edge_count,
1045        max_chain_depth,
1046        chain_count,
1047        dead_node_count,
1048    };
1049
1050    GraphReport {
1051        target,
1052        stats,
1053        sccs,
1054        diamonds,
1055        bridges,
1056        longest_chains,
1057        transitive_edges,
1058        dead_nodes,
1059    }
1060}
1061
1062// ---------------------------------------------------------------------------
1063// Output formatting
1064// ---------------------------------------------------------------------------
1065
1066fn truncate_path(path: &str, max_len: usize) -> String {
1067    if path.len() <= max_len {
1068        path.to_string()
1069    } else {
1070        // Use char_indices to find a safe character boundary, avoiding a byte-index
1071        // slice into a multi-byte UTF-8 sequence which would panic.
1072        let suffix = path
1073            .char_indices()
1074            .rev()
1075            .find(|(i, _)| path.len() - i <= max_len - 3)
1076            .map(|(i, _)| &path[i..])
1077            .unwrap_or(path);
1078        format!("...{}", suffix)
1079    }
1080}
1081
1082impl OutputFormatter for GraphReport {
1083    fn format_text(&self) -> String {
1084        let mut out = Vec::new();
1085        let s = &self.stats;
1086
1087        let label = match self.target {
1088            GraphTarget::Modules => "Module graph",
1089            GraphTarget::Symbols => "Symbol graph",
1090            GraphTarget::Types => "Type graph",
1091        };
1092        out.push(format!(
1093            "# {} — {} nodes, {} edges, density {:.3}",
1094            label, s.nodes, s.edges, s.density
1095        ));
1096        out.push(format!(
1097            "  {} weakly connected components (largest: {})",
1098            s.weakly_connected_components, s.largest_component_size
1099        ));
1100        out.push(format!(
1101            "  {} circular-dependency clusters, {} diamonds, {} bridges, {} transitive edges",
1102            s.nontrivial_scc_count, s.diamond_count, s.bridge_count, s.transitive_edge_count
1103        ));
1104        if s.max_chain_depth > 0 {
1105            out.push(format!(
1106                "  max chain depth {}, {} deep chains (depth > 2)",
1107                s.max_chain_depth, s.chain_count
1108            ));
1109        }
1110        if s.dead_node_count > 0 {
1111            out.push(format!(
1112                "  {} unreferenced nodes (no inbound edges)",
1113                s.dead_node_count
1114            ));
1115        }
1116        out.push(String::new());
1117
1118        if s.nodes == 0 {
1119            out.push("No data found. Run `normalize structure rebuild` first.".to_string());
1120            return out.join("\n");
1121        }
1122
1123        // SCCs
1124        if !self.sccs.is_empty() {
1125            out.push(format!(
1126                "## Circular dependency clusters ({} SCCs)",
1127                self.sccs.len()
1128            ));
1129            for scc in &self.sccs {
1130                let modules: Vec<String> =
1131                    scc.modules.iter().map(|m| truncate_path(m, 40)).collect();
1132                out.push(format!(
1133                    "  [{} modules, {} edges] {}",
1134                    scc.modules.len(),
1135                    scc.internal_edges,
1136                    modules.join(", ")
1137                ));
1138            }
1139            out.push(String::new());
1140        }
1141
1142        // Diamonds
1143        if !self.diamonds.is_empty() {
1144            out.push(format!(
1145                "## Diamond dependencies ({} found)",
1146                self.stats.diamond_count
1147            ));
1148            for d in &self.diamonds {
1149                out.push(format!(
1150                    "  {} → {{{}, {}}} → {}",
1151                    truncate_path(&d.source, 30),
1152                    truncate_path(&d.left, 25),
1153                    truncate_path(&d.right, 25),
1154                    truncate_path(&d.target, 30),
1155                ));
1156            }
1157            out.push(String::new());
1158        }
1159
1160        // Bridges
1161        if !self.bridges.is_empty() {
1162            out.push(format!(
1163                "## Bridge edges ({} critical dependencies)",
1164                self.bridges.len()
1165            ));
1166            for b in &self.bridges {
1167                out.push(format!(
1168                    "  {} → {}",
1169                    truncate_path(&b.from, 40),
1170                    truncate_path(&b.to, 40),
1171                ));
1172            }
1173            out.push(String::new());
1174        }
1175
1176        // Longest chains
1177        if !self.longest_chains.is_empty() {
1178            out.push(format!(
1179                "## Deep import chains ({}, max depth {})",
1180                self.longest_chains.len(),
1181                self.stats.max_chain_depth
1182            ));
1183            for chain in &self.longest_chains {
1184                let short_modules: Vec<String> =
1185                    chain.modules.iter().map(|m| truncate_path(m, 30)).collect();
1186                out.push(format!(
1187                    "  [depth {}] {}",
1188                    chain.depth,
1189                    short_modules.join(" → ")
1190                ));
1191            }
1192            out.push(String::new());
1193        }
1194
1195        // Transitive edges
1196        if !self.transitive_edges.is_empty() {
1197            let showing = if self.transitive_edges.len() < self.stats.transitive_edge_count {
1198                format!(" (showing {})", self.transitive_edges.len())
1199            } else {
1200                String::new()
1201            };
1202            out.push(format!(
1203                "## Transitive edges ({} redundant{})",
1204                self.stats.transitive_edge_count, showing
1205            ));
1206            for te in &self.transitive_edges {
1207                out.push(format!(
1208                    "  {} → {}  (via {})",
1209                    truncate_path(&te.from, 30),
1210                    truncate_path(&te.to, 30),
1211                    truncate_path(&te.via, 30),
1212                ));
1213            }
1214            out.push(String::new());
1215        }
1216
1217        // Dead nodes (no inbound edges)
1218        if !self.dead_nodes.is_empty() {
1219            let label = match self.target {
1220                GraphTarget::Modules => "Unreferenced modules",
1221                GraphTarget::Symbols => "Uncalled symbols",
1222                GraphTarget::Types => "Unreferenced types",
1223            };
1224            out.push(format!(
1225                "## {} ({} nodes with no inbound edges)",
1226                label,
1227                self.dead_nodes.len()
1228            ));
1229            for node in &self.dead_nodes {
1230                out.push(format!("  {}", node));
1231            }
1232            out.push(String::new());
1233        }
1234
1235        out.join("\n")
1236    }
1237
1238    fn format_pretty(&self) -> String {
1239        let mut out = Vec::new();
1240        let s = &self.stats;
1241
1242        let label = match self.target {
1243            GraphTarget::Modules => "Module graph",
1244            GraphTarget::Symbols => "Symbol graph",
1245            GraphTarget::Types => "Type graph",
1246        };
1247        out.push(format!(
1248            "\x1b[1;36m# {}\x1b[0m — \x1b[1m{}\x1b[0m nodes, \x1b[1m{}\x1b[0m edges, density \x1b[33m{:.3}\x1b[0m",
1249            label, s.nodes, s.edges, s.density
1250        ));
1251        out.push(format!(
1252            "  \x1b[32m{}\x1b[0m weakly connected components (largest: \x1b[1m{}\x1b[0m)",
1253            s.weakly_connected_components, s.largest_component_size
1254        ));
1255
1256        let scc_color = if s.nontrivial_scc_count > 0 {
1257            "\x1b[1;31m"
1258        } else {
1259            "\x1b[32m"
1260        };
1261        let diamond_color = if s.diamond_count > 0 {
1262            "\x1b[33m"
1263        } else {
1264            "\x1b[32m"
1265        };
1266        let bridge_color = if s.bridge_count > 0 {
1267            "\x1b[1;33m"
1268        } else {
1269            "\x1b[32m"
1270        };
1271        let trans_color = if s.transitive_edge_count > 0 {
1272            "\x1b[33m"
1273        } else {
1274            "\x1b[32m"
1275        };
1276
1277        out.push(format!(
1278            "  {}{}\x1b[0m circular-dependency clusters, {}{}\x1b[0m diamonds, {}{}\x1b[0m bridges, {}{}\x1b[0m transitive edges",
1279            scc_color, s.nontrivial_scc_count,
1280            diamond_color, s.diamond_count,
1281            bridge_color, s.bridge_count,
1282            trans_color, s.transitive_edge_count,
1283        ));
1284        if s.max_chain_depth > 0 {
1285            let depth_color = if s.max_chain_depth >= 5 {
1286                "\x1b[1;31m"
1287            } else if s.max_chain_depth >= 3 {
1288                "\x1b[33m"
1289            } else {
1290                "\x1b[32m"
1291            };
1292            out.push(format!(
1293                "  max chain depth {}{}\x1b[0m, {} deep chains (depth > 2)",
1294                depth_color, s.max_chain_depth, s.chain_count
1295            ));
1296        }
1297        if s.dead_node_count > 0 {
1298            out.push(format!(
1299                "  \x1b[2m{} unreferenced nodes (no inbound edges)\x1b[0m",
1300                s.dead_node_count
1301            ));
1302        }
1303        out.push(String::new());
1304
1305        if s.nodes == 0 {
1306            out.push("No data found. Run `normalize structure rebuild` first.".to_string());
1307            return out.join("\n");
1308        }
1309
1310        // SCCs
1311        if !self.sccs.is_empty() {
1312            out.push(format!(
1313                "\x1b[1;31m## Circular dependency clusters ({} SCCs)\x1b[0m",
1314                self.sccs.len()
1315            ));
1316            for scc in &self.sccs {
1317                let modules: Vec<String> =
1318                    scc.modules.iter().map(|m| truncate_path(m, 40)).collect();
1319                out.push(format!(
1320                    "  \x1b[31m[{} modules, {} edges]\x1b[0m {}",
1321                    scc.modules.len(),
1322                    scc.internal_edges,
1323                    modules.join(", ")
1324                ));
1325            }
1326            out.push(String::new());
1327        }
1328
1329        // Diamonds
1330        if !self.diamonds.is_empty() {
1331            out.push(format!(
1332                "\x1b[1;33m## Diamond dependencies ({} found)\x1b[0m",
1333                self.stats.diamond_count
1334            ));
1335            for d in &self.diamonds {
1336                out.push(format!(
1337                    "  {} \x1b[33m→\x1b[0m {{{}, {}}} \x1b[33m→\x1b[0m {}",
1338                    truncate_path(&d.source, 30),
1339                    truncate_path(&d.left, 25),
1340                    truncate_path(&d.right, 25),
1341                    truncate_path(&d.target, 30),
1342                ));
1343            }
1344            out.push(String::new());
1345        }
1346
1347        // Bridges
1348        if !self.bridges.is_empty() {
1349            out.push(format!(
1350                "\x1b[1;33m## Bridge edges ({} critical dependencies)\x1b[0m",
1351                self.bridges.len()
1352            ));
1353            for b in &self.bridges {
1354                out.push(format!(
1355                    "  {} \x1b[1;33m→\x1b[0m {}",
1356                    truncate_path(&b.from, 40),
1357                    truncate_path(&b.to, 40),
1358                ));
1359            }
1360            out.push(String::new());
1361        }
1362
1363        // Longest chains
1364        if !self.longest_chains.is_empty() {
1365            out.push(format!(
1366                "\x1b[1m## Deep import chains ({}, max depth {})\x1b[0m",
1367                self.longest_chains.len(),
1368                self.stats.max_chain_depth
1369            ));
1370            for chain in &self.longest_chains {
1371                let short_modules: Vec<String> =
1372                    chain.modules.iter().map(|m| truncate_path(m, 30)).collect();
1373                let depth_color = if chain.depth >= 5 {
1374                    "\x1b[1;31m"
1375                } else if chain.depth >= 3 {
1376                    "\x1b[33m"
1377                } else {
1378                    "\x1b[32m"
1379                };
1380                out.push(format!(
1381                    "  {}[depth {}]\x1b[0m {}",
1382                    depth_color,
1383                    chain.depth,
1384                    short_modules.join(" \x1b[2m→\x1b[0m ")
1385                ));
1386            }
1387            out.push(String::new());
1388        }
1389
1390        // Transitive edges
1391        if !self.transitive_edges.is_empty() {
1392            let showing = if self.transitive_edges.len() < self.stats.transitive_edge_count {
1393                format!(" (showing {})", self.transitive_edges.len())
1394            } else {
1395                String::new()
1396            };
1397            out.push(format!(
1398                "\x1b[33m## Transitive edges ({} redundant{})\x1b[0m",
1399                self.stats.transitive_edge_count, showing
1400            ));
1401            for te in &self.transitive_edges {
1402                out.push(format!(
1403                    "  {} → {}  \x1b[2m(via {})\x1b[0m",
1404                    truncate_path(&te.from, 30),
1405                    truncate_path(&te.to, 30),
1406                    truncate_path(&te.via, 30),
1407                ));
1408            }
1409            out.push(String::new());
1410        }
1411
1412        // Dead nodes (no inbound edges)
1413        if !self.dead_nodes.is_empty() {
1414            let label = match self.target {
1415                GraphTarget::Modules => "Unreferenced modules",
1416                GraphTarget::Symbols => "Uncalled symbols",
1417                GraphTarget::Types => "Unreferenced types",
1418            };
1419            out.push(format!(
1420                "\x1b[2m## {} ({} with no inbound edges)\x1b[0m",
1421                label,
1422                self.dead_nodes.len()
1423            ));
1424            for node in &self.dead_nodes {
1425                out.push(format!("  \x1b[2m{}\x1b[0m", node));
1426            }
1427            out.push(String::new());
1428        }
1429
1430        out.join("\n")
1431    }
1432}