Skip to main content

cgx_engine/
resolver.rs

1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use crate::parser::{EdgeDef, EdgeKind, NodeDef, NodeKind};
5
6/// Returns true if the given relative file path looks like a test file.
7pub fn is_test_path(path: &str) -> bool {
8    let lower = path.to_lowercase();
9    lower.contains("/test/")
10        || lower.contains("/tests/")
11        || lower.contains("/__tests__/")
12        || lower.contains("/spec/")
13        || lower.ends_with(".test.ts")
14        || lower.ends_with(".spec.ts")
15        || lower.ends_with(".test.js")
16        || lower.ends_with(".spec.js")
17        || lower.ends_with(".test.tsx")
18        || lower.ends_with(".spec.tsx")
19        || lower.ends_with("_test.py")
20        || lower.ends_with("_test.rs")
21}
22
23/// Resolve raw parser edges into fully-qualified node IDs.
24///
25/// Import paths and call targets are matched against the known node set.
26/// `CALLS` edges originating from test files are reclassified as `TESTS`
27/// when the destination is a production symbol.  Unresolvable edges are
28/// kept so later analysis phases can still use them.
29pub fn resolve(
30    nodes: &[NodeDef],
31    edges: &[EdgeDef],
32    _repo_root: &Path,
33) -> anyhow::Result<Vec<EdgeDef>> {
34    let mut resolved_edges: Vec<EdgeDef> = Vec::new();
35
36    // Create a set of all known node IDs for validation
37    let node_ids: HashSet<&str> = nodes.iter().map(|n| n.id.as_str()).collect();
38
39    // Build export index: name -> Vec<node_id>
40    let mut export_index: HashMap<String, Vec<String>> = HashMap::new();
41    for node in nodes {
42        export_index
43            .entry(node.name.clone())
44            .or_default()
45            .push(node.id.clone());
46    }
47
48    // Build file node set from actual files
49    let mut file_paths: HashSet<String> = HashSet::new();
50    for node in nodes {
51        file_paths.insert(node.path.clone());
52    }
53
54    // Create file node IDs we know about
55    let known_file_ids: HashSet<String> =
56        file_paths.iter().map(|p| format!("file:{}", p)).collect();
57
58    // Build a set of test-file paths for CALLS→TESTS reclassification
59    let test_paths: HashSet<&str> = nodes
60        .iter()
61        .filter(|n| is_test_path(&n.path))
62        .map(|n| n.path.as_str())
63        .collect();
64
65    // Also test source IDs (fn:/cls: nodes whose path is a test file)
66    let test_node_ids: HashSet<&str> = nodes
67        .iter()
68        .filter(|n| is_test_path(&n.path))
69        .map(|n| n.id.as_str())
70        .collect();
71    let _ = test_paths; // used via test_node_ids
72
73    // Process all edges
74    for edge in edges {
75        match edge.kind {
76            EdgeKind::Imports => {
77                // src = file:<current_file>, dst = file:<imported_file>
78                // Check if dst is a valid file ID, or try to resolve it
79                let dst_is_valid =
80                    node_ids.contains(edge.dst.as_str()) || known_file_ids.contains(&edge.dst);
81
82                if dst_is_valid {
83                    resolved_edges.push(EdgeDef {
84                        src: edge.src.clone(),
85                        dst: edge.dst.clone(),
86                        kind: EdgeKind::Imports,
87                        ..Default::default()
88                    });
89                } else {
90                    // Try with different extensions
91                    let import_target = edge.dst.trim_start_matches("file:");
92                    let mut found = false;
93                    for ext in &[".ts", ".tsx", ".js", ".jsx", ".py", ".rs"] {
94                        let alt = format!("file:{}{}", import_target, ext);
95                        if known_file_ids.contains(&alt) {
96                            resolved_edges.push(EdgeDef {
97                                src: edge.src.clone(),
98                                dst: alt,
99                                kind: EdgeKind::Imports,
100                                ..Default::default()
101                            });
102                            found = true;
103                            break;
104                        }
105                    }
106                    // Try directory index files (Node.js resolution)
107                    if !found {
108                        for index in &["/index.js", "/index.ts", "/index.jsx", "/index.tsx"] {
109                            let alt = format!("file:{}{}", import_target, index);
110                            if known_file_ids.contains(&alt) {
111                                resolved_edges.push(EdgeDef {
112                                    src: edge.src.clone(),
113                                    dst: alt,
114                                    kind: EdgeKind::Imports,
115                                    ..Default::default()
116                                });
117                                found = true;
118                                break;
119                            }
120                        }
121                    }
122                    if !found {
123                        // Include unresolvable imports too (they may still be useful)
124                        resolved_edges.push(edge.clone());
125                    }
126                }
127            }
128            EdgeKind::Exports => {
129                // src = file:<path>, dst = fn:path:name or cls:path:name
130                if node_ids.contains(edge.dst.as_str()) {
131                    resolved_edges.push(edge.clone());
132                } else {
133                    // Keep the edge but log a warning — may be resolved in a later pass
134                    tracing::debug!("Unresolved export edge: {} -> {}", edge.src, edge.dst);
135                    resolved_edges.push(edge.clone());
136                }
137            }
138            EdgeKind::Calls | EdgeKind::Inherits => {
139                // If source node is from a test file and destination is not, use TESTS edge
140                let src_is_test = test_node_ids.contains(edge.src.as_str());
141
142                // If dst is a valid node ID, keep it. Otherwise try to resolve by name.
143                if node_ids.contains(edge.dst.as_str()) {
144                    let dst_is_test = test_node_ids.contains(edge.dst.as_str());
145                    let kind = if src_is_test && !dst_is_test {
146                        EdgeKind::Tests
147                    } else {
148                        edge.kind.clone()
149                    };
150                    resolved_edges.push(EdgeDef {
151                        kind,
152                        ..edge.clone()
153                    });
154                } else if let Some(targets) = export_index.get(&edge.dst) {
155                    // Found matching names - create CALLS (or TESTS) edges with lower confidence
156                    for target_id in targets {
157                        let dst_is_test = test_node_ids.contains(target_id.as_str());
158                        let kind = if src_is_test && !dst_is_test {
159                            EdgeKind::Tests
160                        } else {
161                            EdgeKind::Calls
162                        };
163                        resolved_edges.push(EdgeDef {
164                            src: edge.src.clone(),
165                            dst: target_id.clone(),
166                            kind,
167                            confidence: 0.8,
168                            ..Default::default()
169                        });
170                    }
171                } else {
172                    // Keep the edge even if unresolved (maybe a future phase can handle)
173                    resolved_edges.push(edge.clone());
174                }
175            }
176            _ => {
177                // CoChanges, Owns, DependsOn — pass through unchanged
178                resolved_edges.push(edge.clone());
179            }
180        }
181    }
182
183    Ok(resolved_edges)
184}
185
186/// Create `File` [`NodeDef`]s for every path in `file_paths`.
187///
188/// These synthetic nodes are added to the graph so that `IMPORTS` edges
189/// always have a valid destination, even for files that contain no parseable symbols.
190pub fn create_file_nodes(
191    file_paths: &HashSet<String>,
192    language: &HashMap<String, &str>,
193) -> Vec<NodeDef> {
194    let mut nodes = Vec::new();
195
196    for path in file_paths {
197        let id = format!("file:{}", path);
198        let _lang = language.get(path.as_str()).copied().unwrap_or("unknown");
199
200        nodes.push(NodeDef {
201            id,
202            kind: NodeKind::File,
203            name: path.clone(),
204            path: path.clone(),
205            line_start: 1,
206            line_end: 1,
207            ..Default::default()
208        });
209    }
210
211    nodes
212}
213
214/// Build a `file_path → language` lookup from a slice of parsed nodes.
215///
216/// Function and class nodes take priority over file nodes so the inferred
217/// language is as accurate as possible.
218pub fn build_language_map(nodes: &[NodeDef]) -> HashMap<String, &'static str> {
219    let mut map = HashMap::new();
220    for node in nodes {
221        let lang = match node.id.split(':').next().unwrap_or("") {
222            "fn" if node.path.ends_with(".ts") || node.path.ends_with(".tsx") => "typescript",
223            "fn" if node.path.ends_with(".js") || node.path.ends_with(".jsx") => "javascript",
224            "fn" if node.path.ends_with(".py") => "python",
225            "fn" if node.path.ends_with(".rs") => "rust",
226            "cls" if node.path.ends_with(".ts") || node.path.ends_with(".tsx") => "typescript",
227            "cls" if node.path.ends_with(".js") || node.path.ends_with(".jsx") => "javascript",
228            "cls" if node.path.ends_with(".py") => "python",
229            "cls" if node.path.ends_with(".rs") => "rust",
230            "file" if node.path.ends_with(".ts") || node.path.ends_with(".tsx") => "typescript",
231            "file" if node.path.ends_with(".js") || node.path.ends_with(".jsx") => "javascript",
232            "file" if node.path.ends_with(".py") => "python",
233            "file" if node.path.ends_with(".rs") => "rust",
234            _ => "unknown",
235        };
236        // Only insert if not already present (function/class nodes take priority)
237        map.entry(node.path.clone()).or_insert(lang);
238    }
239    map
240}