Skip to main content

chainsaw/
graph.rs

1//! Dependency graph data structures.
2//!
3//! A [`ModuleGraph`] is a directed graph of source files (modules) connected by
4//! import edges. Nodes are dense `u32`-indexed [`ModuleId`]s, edges carry an
5//! [`EdgeKind`] distinguishing static, dynamic, and type-only imports.
6
7use serde::{Deserialize, Serialize};
8use std::collections::{HashMap, VecDeque};
9use std::path::PathBuf;
10
11/// Dense index into [`ModuleGraph::modules`].
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
13#[repr(transparent)]
14pub struct ModuleId(pub u32);
15
16/// Dense index into [`ModuleGraph::edges`].
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
18#[repr(transparent)]
19pub struct EdgeId(pub u32);
20
21/// How an import was classified during parsing.
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
23#[non_exhaustive]
24pub enum EdgeKind {
25    /// Top-level `import`/`require`/`from ... import` that runs at module load.
26    Static,
27    /// `import()` expression, `importlib.import_module`, or import inside a function body.
28    Dynamic,
29    /// TypeScript `import type` or Python import inside `if TYPE_CHECKING`.
30    TypeOnly,
31}
32
33/// A single source file in the dependency graph.
34#[derive(Debug, Clone, Serialize, Deserialize)]
35#[non_exhaustive]
36pub struct Module {
37    pub id: ModuleId,
38    pub path: PathBuf,
39    pub size_bytes: u64,
40    /// None for source files, Some("package-name") for `node_modules`
41    pub package: Option<String>,
42}
43
44/// A directed import edge between two modules.
45#[derive(Debug, Clone, Serialize, Deserialize)]
46#[non_exhaustive]
47pub struct Edge {
48    pub id: EdgeId,
49    pub from: ModuleId,
50    pub to: ModuleId,
51    pub kind: EdgeKind,
52    /// The raw import specifier (e.g. "./foo", "@aws-sdk/client-bedrock")
53    pub specifier: String,
54}
55
56/// Aggregated size and file count for a third-party package.
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct PackageInfo {
59    pub name: String,
60    pub entry_module: ModuleId,
61    pub total_reachable_size: u64,
62    pub total_reachable_files: u32,
63}
64
65/// A directed graph of modules connected by import edges.
66#[derive(Debug, Clone, Serialize, Deserialize)]
67#[non_exhaustive]
68pub struct ModuleGraph {
69    pub modules: Vec<Module>,
70    pub edges: Vec<Edge>,
71    /// Outgoing edges per module (indexed by `ModuleId`)
72    pub forward_adj: Vec<Vec<EdgeId>>,
73    pub path_to_id: HashMap<PathBuf, ModuleId>,
74    pub package_map: HashMap<String, PackageInfo>,
75}
76
77impl Default for ModuleGraph {
78    fn default() -> Self {
79        Self::new()
80    }
81}
82
83impl ModuleGraph {
84    pub fn new() -> Self {
85        Self {
86            modules: Vec::new(),
87            edges: Vec::new(),
88            forward_adj: Vec::new(),
89            path_to_id: HashMap::new(),
90            package_map: HashMap::new(),
91        }
92    }
93
94    #[allow(clippy::cast_possible_truncation)]
95    pub fn add_module(
96        &mut self,
97        path: PathBuf,
98        size_bytes: u64,
99        package: Option<String>,
100    ) -> ModuleId {
101        if let Some(&id) = self.path_to_id.get(&path) {
102            return id;
103        }
104        let id = ModuleId(self.modules.len() as u32);
105        self.modules.push(Module {
106            id,
107            path: path.clone(),
108            size_bytes,
109            package,
110        });
111        self.forward_adj.push(Vec::new());
112        self.path_to_id.insert(path, id);
113        id
114    }
115
116    #[allow(clippy::cast_possible_truncation)]
117    pub fn add_edge(
118        &mut self,
119        from: ModuleId,
120        to: ModuleId,
121        kind: EdgeKind,
122        specifier: &str,
123    ) -> EdgeId {
124        // Deduplicate by (from, to, kind) — scan outgoing edges (typically <30)
125        if let Some(&existing) = self.forward_adj[from.0 as usize].iter().find(|&&eid| {
126            let e = &self.edges[eid.0 as usize];
127            e.to == to && e.kind == kind
128        }) {
129            return existing;
130        }
131        let id = EdgeId(self.edges.len() as u32);
132        self.edges.push(Edge {
133            id,
134            from,
135            to,
136            kind,
137            specifier: specifier.to_owned(),
138        });
139        self.forward_adj[from.0 as usize].push(id);
140        id
141    }
142
143    pub fn module(&self, id: ModuleId) -> &Module {
144        &self.modules[id.0 as usize]
145    }
146
147    pub fn edge(&self, id: EdgeId) -> &Edge {
148        &self.edges[id.0 as usize]
149    }
150
151    pub fn outgoing_edges(&self, id: ModuleId) -> &[EdgeId] {
152        &self.forward_adj[id.0 as usize]
153    }
154
155    pub fn module_count(&self) -> usize {
156        self.modules.len()
157    }
158
159    /// Compute aggregated package info (total reachable size + file count).
160    /// For each package, BFS from its entry module following only edges within the same package.
161    pub fn compute_package_info(&mut self) {
162        let mut package_entries: HashMap<String, Vec<ModuleId>> = HashMap::new();
163        for module in &self.modules {
164            if let Some(ref pkg) = module.package {
165                package_entries
166                    .entry(pkg.clone())
167                    .or_default()
168                    .push(module.id);
169            }
170        }
171
172        let num_modules = self.modules.len();
173        for (pkg_name, module_ids) in package_entries {
174            let mut total_size: u64 = 0;
175            let mut total_files: u32 = 0;
176            let mut visited = vec![false; num_modules];
177
178            let mut queue: VecDeque<ModuleId> = module_ids.iter().copied().collect();
179            for &id in &module_ids {
180                visited[id.0 as usize] = true;
181            }
182
183            while let Some(mid) = queue.pop_front() {
184                let module = &self.modules[mid.0 as usize];
185                if module.package.as_deref() == Some(pkg_name.as_str()) {
186                    total_size += module.size_bytes;
187                    total_files += 1;
188                }
189
190                for &edge_id in &self.forward_adj[mid.0 as usize] {
191                    let edge = &self.edges[edge_id.0 as usize];
192                    if edge.kind == EdgeKind::Static {
193                        let target = &self.modules[edge.to.0 as usize];
194                        if target.package.as_deref() == Some(pkg_name.as_str())
195                            && !visited[edge.to.0 as usize]
196                        {
197                            visited[edge.to.0 as usize] = true;
198                            queue.push_back(edge.to);
199                        }
200                    }
201                }
202            }
203
204            let entry_module = module_ids[0];
205            let info = PackageInfo {
206                name: pkg_name.clone(),
207                entry_module,
208                total_reachable_size: total_size,
209                total_reachable_files: total_files,
210            };
211            self.package_map.insert(pkg_name, info);
212        }
213    }
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219
220    #[test]
221    fn add_edge_deduplicates_same_from_to_kind() {
222        let mut g = ModuleGraph::new();
223        let a = g.add_module("a.ts".into(), 100, None);
224        let b = g.add_module("b.ts".into(), 200, None);
225
226        // Add same edge three times (simulating symlink-resolved duplicates)
227        g.add_edge(a, b, EdgeKind::Static, "./b");
228        g.add_edge(a, b, EdgeKind::Static, "./link-to-b");
229        g.add_edge(a, b, EdgeKind::Static, "./double-link");
230
231        // Should have only one edge (deduped by from+to+kind)
232        assert_eq!(g.edges.len(), 1, "duplicate edges should be deduped");
233        assert_eq!(g.forward_adj[a.0 as usize].len(), 1);
234    }
235
236    #[test]
237    fn add_edge_allows_different_kinds() {
238        let mut g = ModuleGraph::new();
239        let a = g.add_module("a.ts".into(), 100, None);
240        let b = g.add_module("b.ts".into(), 200, None);
241
242        // Static and Dynamic edges between same nodes should both exist
243        g.add_edge(a, b, EdgeKind::Static, "./b");
244        g.add_edge(a, b, EdgeKind::Dynamic, "./b");
245
246        assert_eq!(
247            g.edges.len(),
248            2,
249            "different edge kinds should not be deduped"
250        );
251    }
252}