Skip to main content

arbor_graph/
builder.rs

1//! Graph builder for constructing the code graph from parsed nodes.
2//!
3//! Two-pass construction:
4//!   1. Add all nodes — populates symbol table and import map
5//!   2. Resolve edges — uses import context to create accurate edges
6
7use crate::edge::{Edge, EdgeKind};
8use crate::graph::{ArborGraph, NodeId};
9use crate::symbol_table::SymbolTable;
10use arbor_core::{CodeNode, NodeKind};
11use std::collections::HashMap;
12use std::path::PathBuf;
13use tracing::warn;
14
15/// Builds an ArborGraph from parsed code nodes.
16pub struct GraphBuilder {
17    graph: ArborGraph,
18    symbol_table: SymbolTable,
19    name_to_id: HashMap<String, String>,
20
21    /// Per-file map of locally-bound name → source module specifier.
22    /// Built from Import nodes that carry their imported names in `references`.
23    ///
24    /// Example:
25    ///   `import { validate } from '@babel/types'`
26    ///   → import_map["file.ts"]["validate"] = "@babel/types"
27    ///
28    /// Used during edge resolution to verify that a direct call like `validate()`
29    /// is indeed an intentional import, not a same-name coincidence.
30    import_map: HashMap<String, HashMap<String, String>>,
31
32    /// Namespace import aliases: file → alias → source module.
33    ///
34    /// Example:
35    ///   `import * as types from '@babel/types'`
36    ///   → namespace_imports["file.ts"]["types"] = "@babel/types"
37    ///
38    /// Used to resolve calls like `types.validate()` — though since we now DROP
39    /// those calls at parse time, this is reserved for future use when we add
40    /// a richer call-site representation.
41    namespace_imports: HashMap<String, HashMap<String, String>>,
42}
43
44impl Default for GraphBuilder {
45    fn default() -> Self {
46        Self::new()
47    }
48}
49
50impl GraphBuilder {
51    pub fn new() -> Self {
52        Self {
53            graph: ArborGraph::new(),
54            symbol_table: SymbolTable::new(),
55            name_to_id: HashMap::new(),
56            import_map: HashMap::new(),
57            namespace_imports: HashMap::new(),
58        }
59    }
60
61    /// Adds nodes from a parsed file to the graph.
62    ///
63    /// This pass does two things:
64    ///   - Adds real code entities (functions, classes, etc.) to the graph and symbol table
65    ///   - Processes Import nodes to build the per-file import map
66    ///
67    /// Call this for all files before calling `build()`.
68    pub fn add_nodes(&mut self, nodes: Vec<CodeNode>) {
69        for node in nodes {
70            // Import nodes carry the import map — process them but don't add to graph
71            // (they are not code entities we want in centrality analysis)
72            if node.kind == NodeKind::Import {
73                let file = node.file.clone();
74                let module = node.name.clone();
75
76                for imported_name in &node.references {
77                    if let Some(alias) = imported_name.strip_prefix("*as:") {
78                        // `import * as alias from 'module'`
79                        self.namespace_imports
80                            .entry(file.clone())
81                            .or_default()
82                            .insert(alias.to_string(), module.clone());
83                    } else {
84                        // `import { name } from 'module'` or `import DefaultName from 'module'`
85                        self.import_map
86                            .entry(file.clone())
87                            .or_default()
88                            .insert(imported_name.clone(), module.clone());
89                    }
90                }
91                // Import nodes are intentionally NOT added to the graph.
92                // They caused misleading centrality scores (e.g. "psycopg.types.range [Import]
93                // 330 callers") because every call to a symbol named after the import path
94                // was attributed to the import node itself.
95                continue;
96            }
97
98            let id_str = node.id.clone();
99            let name = node.name.clone();
100            let qualified = node.qualified_name.clone();
101            let file = PathBuf::from(&node.file);
102
103            let node_idx = self.graph.add_node(node);
104
105            if !qualified.is_empty() {
106                self.symbol_table
107                    .insert(qualified.clone(), node_idx, file.clone());
108            }
109
110            self.name_to_id.insert(name.clone(), id_str.clone());
111            self.name_to_id.insert(qualified, id_str);
112        }
113    }
114
115    /// Resolves references into actual graph edges.
116    ///
117    /// Resolution order for each reference `R` from file `F`:
118    ///   1. Exact FQN match in symbol table
119    ///   2. Context-aware suffix match (prefers same directory, avoids ambiguity)
120    ///   3. Import-validated match — if R is in F's import map AND a match was found
121    ///      in step 2 for a different file, we skip it to avoid wrong-module edges
122    ///
123    /// References that cannot be resolved are silently dropped (they are external/stdlib
124    /// symbols with no definition in this repository).
125    pub fn resolve_edges(&mut self) {
126        let mut edges_to_add: Vec<(NodeId, NodeId, String)> = Vec::new();
127
128        let node_indices: Vec<NodeId> = self.graph.node_indexes().collect();
129
130        for from_idx in node_indices {
131            let (references, from_file) = {
132                let node = self.graph.get(from_idx).unwrap();
133                (node.references.clone(), PathBuf::from(&node.file))
134            };
135
136            let from_file_str = from_file.to_string_lossy().to_string();
137
138            for reference in references {
139                // 1. Exact FQN match
140                if let Some(to_idx) = self.symbol_table.resolve(&reference) {
141                    if from_idx != to_idx {
142                        edges_to_add.push((from_idx, to_idx, reference.clone()));
143                    }
144                    continue;
145                }
146
147                // 2. Context-aware suffix match
148                if let Some(to_idx) = self
149                    .symbol_table
150                    .resolve_with_context(&reference, &from_file)
151                {
152                    if from_idx == to_idx {
153                        continue;
154                    }
155
156                    // 3. Import-validation filter
157                    //
158                    // If this file has an explicit import map AND the reference is NOT
159                    // in it, the suffix match may have found a wrong-file coincidence.
160                    // Only apply this filter when the file has import data (not all parsers
161                    // provide it yet) and the reference is a simple name (no dots).
162                    //
163                    // Skip if: the file has imports, the name is NOT imported, and the
164                    // matched node is in a completely different part of the tree.
165                    // This prevents `validate()` in file X from linking to `validate` in
166                    // an unrelated module when `validate` is not imported.
167                    if let Some(file_imports) = self.import_map.get(&from_file_str) {
168                        if !file_imports.is_empty()
169                            && !reference.contains('.')
170                            && !file_imports.contains_key(&reference)
171                        {
172                            // Not imported explicitly — only allow if in same file or same dir
173                            let to_node = self.graph.get(to_idx).unwrap();
174                            let to_file = PathBuf::from(&to_node.file);
175                            let same_file = to_file == from_file;
176                            let same_dir = to_file.parent() == from_file.parent();
177                            if !same_file && !same_dir {
178                                warn!(
179                                    "Skipping unimported cross-module reference '{}' in {} → {}",
180                                    reference,
181                                    from_file.display(),
182                                    to_file.display()
183                                );
184                                continue;
185                            }
186                        }
187                    }
188
189                    edges_to_add.push((from_idx, to_idx, reference.clone()));
190                    continue;
191                }
192
193                // Unresolved: external/stdlib symbol — silently drop (expected)
194                #[cfg(debug_assertions)]
195                warn!(
196                    "Unresolved reference '{}' in {} (likely external/stdlib)",
197                    reference,
198                    from_file.display()
199                );
200            }
201        }
202
203        for (from_id, to_id, _) in edges_to_add {
204            self.graph
205                .add_edge(from_id, to_id, Edge::new(EdgeKind::Calls));
206        }
207    }
208
209    /// Finishes building and returns the graph.
210    pub fn build(mut self) -> ArborGraph {
211        self.resolve_edges();
212        self.graph
213    }
214
215    /// Builds without resolving edges (for incremental updates).
216    pub fn build_without_resolve(self) -> ArborGraph {
217        self.graph
218    }
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224    use arbor_core::NodeKind;
225
226    #[test]
227    fn test_builder_adds_nodes() {
228        let mut builder = GraphBuilder::new();
229        let node1 = CodeNode::new("foo", "foo", NodeKind::Function, "test.rs");
230        let node2 = CodeNode::new("bar", "bar", NodeKind::Function, "test.rs");
231        builder.add_nodes(vec![node1, node2]);
232        let graph = builder.build();
233        assert_eq!(graph.node_count(), 2);
234    }
235
236    #[test]
237    fn test_builder_resolves_edges() {
238        let mut builder = GraphBuilder::new();
239        let caller = CodeNode::new("caller", "caller", NodeKind::Function, "test.rs")
240            .with_references(vec!["callee".to_string()]);
241        let callee = CodeNode::new("callee", "callee", NodeKind::Function, "test.rs");
242        builder.add_nodes(vec![caller, callee]);
243        let graph = builder.build();
244        assert_eq!(graph.node_count(), 2);
245        assert_eq!(graph.edge_count(), 1);
246    }
247
248    #[test]
249    fn test_cross_file_resolution() {
250        let mut builder = GraphBuilder::new();
251        let caller = CodeNode::new("main", "main", NodeKind::Function, "main.rs")
252            .with_references(vec!["pkg.Utils.helper".to_string()]);
253        let mut callee = CodeNode::new("helper", "helper", NodeKind::Method, "utils.rs");
254        callee.qualified_name = "pkg.Utils.helper".to_string();
255        builder.add_nodes(vec![caller]);
256        builder.add_nodes(vec![callee]);
257        let graph = builder.build();
258        assert_eq!(graph.node_count(), 2);
259        assert_eq!(
260            graph.edge_count(),
261            1,
262            "Should resolve cross-file edge via FQN"
263        );
264    }
265
266    #[test]
267    fn test_unresolved_references_no_false_edges() {
268        let mut builder = GraphBuilder::new();
269        let node = CodeNode::new("caller", "caller", NodeKind::Function, "a.rs")
270            .with_references(vec!["nonexistent_function".to_string()]);
271        builder.add_nodes(vec![node]);
272        let graph = builder.build();
273        assert_eq!(graph.node_count(), 1);
274        assert_eq!(
275            graph.edge_count(),
276            0,
277            "Unresolved references must not create edges"
278        );
279    }
280
281    #[test]
282    fn test_import_nodes_not_added_to_graph() {
283        let mut builder = GraphBuilder::new();
284        let import_node = CodeNode::new("./utils", "./utils", NodeKind::Import, "main.ts")
285            .with_references(vec!["validate".to_string()]);
286        let func = CodeNode::new("main", "main", NodeKind::Function, "main.ts");
287        builder.add_nodes(vec![import_node, func]);
288        let graph = builder.build();
289        // Only the function should be in the graph, not the import node
290        assert_eq!(graph.node_count(), 1);
291    }
292
293    #[test]
294    fn test_import_map_built_correctly() {
295        let mut builder = GraphBuilder::new();
296        let import_node =
297            CodeNode::new("@babel/types", "@babel/types", NodeKind::Import, "file.ts")
298                .with_references(vec!["validate".to_string(), "clone".to_string()]);
299        builder.add_nodes(vec![import_node]);
300        assert_eq!(
301            builder
302                .import_map
303                .get("file.ts")
304                .and_then(|m| m.get("validate")),
305            Some(&"@babel/types".to_string())
306        );
307        assert_eq!(
308            builder
309                .import_map
310                .get("file.ts")
311                .and_then(|m| m.get("clone")),
312            Some(&"@babel/types".to_string())
313        );
314    }
315
316    #[test]
317    fn test_namespace_import_map() {
318        let mut builder = GraphBuilder::new();
319        let import_node =
320            CodeNode::new("@babel/types", "@babel/types", NodeKind::Import, "file.ts")
321                .with_references(vec!["*as:types".to_string()]);
322        builder.add_nodes(vec![import_node]);
323        assert_eq!(
324            builder
325                .namespace_imports
326                .get("file.ts")
327                .and_then(|m| m.get("types")),
328            Some(&"@babel/types".to_string())
329        );
330    }
331
332    #[test]
333    fn test_build_empty_graph() {
334        let builder = GraphBuilder::new();
335        let graph = builder.build();
336        assert_eq!(graph.node_count(), 0);
337        assert_eq!(graph.edge_count(), 0);
338    }
339
340    #[test]
341    fn test_qualified_static_call_resolves_to_correct_class() {
342        // Regression: `MathUtils.add()` in Calc must link to MathUtils.add, NOT to a
343        // same-named `add` on a sibling class. Relies on the parser keeping the class
344        // qualifier so the builder gets an exact FQN match.
345        let mut b = GraphBuilder::new();
346        let caller = CodeNode::new("compute", "Calc.compute", NodeKind::Method, "src/Calc.java")
347            .with_references(vec!["MathUtils.add".to_string()]);
348        // Same-dir sibling with a colliding bare name — the old bug linked here.
349        let sibling = CodeNode::new("add", "Sibling.add", NodeKind::Method, "src/Sibling.java");
350        let target = CodeNode::new(
351            "add",
352            "MathUtils.add",
353            NodeKind::Method,
354            "src/util/MathUtils.java",
355        );
356        b.add_nodes(vec![caller, sibling, target]);
357        let graph = b.build();
358
359        let compute_idx = graph
360            .node_indexes()
361            .find(|&i| graph.get(i).unwrap().name == "compute")
362            .unwrap();
363        let callees = graph.get_callees(compute_idx);
364        assert_eq!(callees.len(), 1, "exactly one resolved callee");
365        assert_eq!(
366            callees[0].qualified_name, "MathUtils.add",
367            "static call must resolve to the qualified class, not a same-named sibling"
368        );
369    }
370}