Skip to main content

chainsaw/
graph.rs

1//! Dependency graph data structures.
2//!
3//! A [`ModuleGraph`] is a directed graph of source files (modules) connected by
4//! import edges. Nodes are dense `u32`-indexed [`ModuleId`]s, edges carry an
5//! [`EdgeKind`] distinguishing static, dynamic, and type-only imports.
6
7use serde::{Deserialize, Serialize};
8use std::collections::{HashMap, VecDeque};
9use std::path::PathBuf;
10
11/// Dense index into [`ModuleGraph::modules`].
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
13#[repr(transparent)]
14pub struct ModuleId(pub u32);
15
16/// Dense index into [`ModuleGraph::edges`].
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
18#[repr(transparent)]
19pub struct EdgeId(pub u32);
20
21/// How an import was classified during parsing.
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
23#[non_exhaustive]
24pub enum EdgeKind {
25    /// Top-level `import`/`require`/`from ... import` that runs at module load.
26    Static,
27    /// `import()` expression, `importlib.import_module`, or import inside a function body.
28    Dynamic,
29    /// TypeScript `import type` or Python import inside `if TYPE_CHECKING`.
30    TypeOnly,
31}
32
33/// A single source file in the dependency graph.
34#[derive(Debug, Clone, Serialize, Deserialize)]
35#[non_exhaustive]
36pub struct Module {
37    pub id: ModuleId,
38    pub path: PathBuf,
39    pub size_bytes: u64,
40    /// None for source files, Some("package-name") for `node_modules`
41    pub package: Option<String>,
42}
43
44/// A directed import edge between two modules.
45#[derive(Debug, Clone, Serialize, Deserialize)]
46#[non_exhaustive]
47pub struct Edge {
48    pub id: EdgeId,
49    pub from: ModuleId,
50    pub to: ModuleId,
51    pub kind: EdgeKind,
52    /// The raw import specifier (e.g. "./foo", "@aws-sdk/client-bedrock")
53    pub specifier: String,
54}
55
56/// Aggregated size and file count for a third-party package.
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct PackageInfo {
59    pub name: String,
60    pub entry_module: ModuleId,
61    pub total_reachable_size: u64,
62    pub total_reachable_files: u32,
63}
64
65/// A directed graph of modules connected by import edges.
66#[derive(Debug, Clone, Serialize, Deserialize)]
67#[non_exhaustive]
68pub struct ModuleGraph {
69    pub modules: Vec<Module>,
70    pub edges: Vec<Edge>,
71    /// Outgoing edges per module (indexed by `ModuleId`)
72    pub forward_adj: Vec<Vec<EdgeId>>,
73    pub path_to_id: HashMap<PathBuf, ModuleId>,
74    pub package_map: HashMap<String, PackageInfo>,
75}
76
77impl Default for ModuleGraph {
78    fn default() -> Self {
79        Self::new()
80    }
81}
82
83impl ModuleGraph {
84    pub fn new() -> Self {
85        Self {
86            modules: Vec::new(),
87            edges: Vec::new(),
88            forward_adj: Vec::new(),
89            path_to_id: HashMap::new(),
90            package_map: HashMap::new(),
91        }
92    }
93
94    #[allow(clippy::cast_possible_truncation)]
95    pub fn add_module(&mut self, path: PathBuf, size_bytes: u64, package: Option<String>) -> ModuleId {
96        if let Some(&id) = self.path_to_id.get(&path) {
97            return id;
98        }
99        let id = ModuleId(self.modules.len() as u32);
100        self.modules.push(Module {
101            id,
102            path: path.clone(),
103            size_bytes,
104            package,
105        });
106        self.forward_adj.push(Vec::new());
107        self.path_to_id.insert(path, id);
108        id
109    }
110
111    #[allow(clippy::cast_possible_truncation)]
112    pub fn add_edge(&mut self, from: ModuleId, to: ModuleId, kind: EdgeKind, specifier: &str) -> EdgeId {
113        // Deduplicate by (from, to, kind) — scan outgoing edges (typically <30)
114        if let Some(&existing) = self.forward_adj[from.0 as usize]
115            .iter()
116            .find(|&&eid| {
117                let e = &self.edges[eid.0 as usize];
118                e.to == to && e.kind == kind
119            })
120        {
121            return existing;
122        }
123        let id = EdgeId(self.edges.len() as u32);
124        self.edges.push(Edge {
125            id,
126            from,
127            to,
128            kind,
129            specifier: specifier.to_owned(),
130        });
131        self.forward_adj[from.0 as usize].push(id);
132        id
133    }
134
135    pub fn module(&self, id: ModuleId) -> &Module {
136        &self.modules[id.0 as usize]
137    }
138
139    pub fn edge(&self, id: EdgeId) -> &Edge {
140        &self.edges[id.0 as usize]
141    }
142
143    pub fn outgoing_edges(&self, id: ModuleId) -> &[EdgeId] {
144        &self.forward_adj[id.0 as usize]
145    }
146
147    pub fn module_count(&self) -> usize {
148        self.modules.len()
149    }
150
151    /// Compute aggregated package info (total reachable size + file count).
152    /// For each package, BFS from its entry module following only edges within the same package.
153    pub fn compute_package_info(&mut self) {
154        let mut package_entries: HashMap<String, Vec<ModuleId>> = HashMap::new();
155        for module in &self.modules {
156            if let Some(ref pkg) = module.package {
157                package_entries.entry(pkg.clone()).or_default().push(module.id);
158            }
159        }
160
161        let num_modules = self.modules.len();
162        for (pkg_name, module_ids) in package_entries {
163            let mut total_size: u64 = 0;
164            let mut total_files: u32 = 0;
165            let mut visited = vec![false; num_modules];
166
167            let mut queue: VecDeque<ModuleId> = module_ids.iter().copied().collect();
168            for &id in &module_ids {
169                visited[id.0 as usize] = true;
170            }
171
172            while let Some(mid) = queue.pop_front() {
173                let module = &self.modules[mid.0 as usize];
174                if module.package.as_deref() == Some(pkg_name.as_str()) {
175                    total_size += module.size_bytes;
176                    total_files += 1;
177                }
178
179                for &edge_id in &self.forward_adj[mid.0 as usize] {
180                    let edge = &self.edges[edge_id.0 as usize];
181                    if edge.kind == EdgeKind::Static {
182                        let target = &self.modules[edge.to.0 as usize];
183                        if target.package.as_deref() == Some(pkg_name.as_str())
184                            && !visited[edge.to.0 as usize]
185                        {
186                            visited[edge.to.0 as usize] = true;
187                            queue.push_back(edge.to);
188                        }
189                    }
190                }
191            }
192
193            let entry_module = module_ids[0];
194            let info = PackageInfo {
195                name: pkg_name.clone(),
196                entry_module,
197                total_reachable_size: total_size,
198                total_reachable_files: total_files,
199            };
200            self.package_map.insert(pkg_name, info);
201        }
202    }
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208
209    #[test]
210    fn add_edge_deduplicates_same_from_to_kind() {
211        let mut g = ModuleGraph::new();
212        let a = g.add_module("a.ts".into(), 100, None);
213        let b = g.add_module("b.ts".into(), 200, None);
214
215        // Add same edge three times (simulating symlink-resolved duplicates)
216        g.add_edge(a, b, EdgeKind::Static, "./b");
217        g.add_edge(a, b, EdgeKind::Static, "./link-to-b");
218        g.add_edge(a, b, EdgeKind::Static, "./double-link");
219
220        // Should have only one edge (deduped by from+to+kind)
221        assert_eq!(g.edges.len(), 1, "duplicate edges should be deduped");
222        assert_eq!(g.forward_adj[a.0 as usize].len(), 1);
223    }
224
225    #[test]
226    fn add_edge_allows_different_kinds() {
227        let mut g = ModuleGraph::new();
228        let a = g.add_module("a.ts".into(), 100, None);
229        let b = g.add_module("b.ts".into(), 200, None);
230
231        // Static and Dynamic edges between same nodes should both exist
232        g.add_edge(a, b, EdgeKind::Static, "./b");
233        g.add_edge(a, b, EdgeKind::Dynamic, "./b");
234
235        assert_eq!(g.edges.len(), 2, "different edge kinds should not be deduped");
236    }
237}