Skip to main content

cgx_engine/parsers/
ts.rs

1use tree_sitter::{Parser, Query, QueryCursor, Node};
2
3use crate::parser::{EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult};
4use crate::walker::SourceFile;
5
6pub struct TypeScriptParser {
7    language: tree_sitter::Language,
8}
9
10impl TypeScriptParser {
11    pub fn new() -> Self {
12        Self {
13            language: tree_sitter_typescript::language_typescript(),
14        }
15    }
16}
17
18impl Default for TypeScriptParser {
19    fn default() -> Self {
20        Self::new()
21    }
22}
23
24impl LanguageParser for TypeScriptParser {
25    fn extensions(&self) -> &[&str] {
26        &["ts", "tsx", "js", "jsx", "mjs", "cjs"]
27    }
28
29    fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
30        let mut parser = Parser::new();
31        parser.set_language(&self.language)?;
32
33        let tree = parser.parse(&file.content, None).ok_or_else(|| {
34            anyhow::anyhow!("failed to parse {}", file.relative_path)
35        })?;
36
37        let source_bytes = file.content.as_bytes();
38        let root = tree.root_node();
39        let mut nodes = Vec::new();
40        let mut edges = Vec::new();
41
42        let fp = file_node_id(&file.relative_path);
43
44        // Parse function declarations
45        if let Ok(query) =
46            Query::new(&self.language, "(function_declaration name: (identifier) @name) @fn")
47        {
48            extract_nodes(
49                &mut nodes,
50                &mut edges,
51                file,
52                &query,
53                root,
54                source_bytes,
55                NodeKind::Function,
56                "fn",
57                &fp,
58            );
59        }
60
61        // Parse arrow functions / variable declarations with arrow
62        if let Ok(query) = Query::new(
63            &self.language,
64            "(variable_declarator name: (identifier) @name value: (arrow_function) @fn)",
65        ) {
66            extract_nodes(
67                &mut nodes,
68                &mut edges,
69                file,
70                &query,
71                root,
72                source_bytes,
73                NodeKind::Function,
74                "fn",
75                &fp,
76            );
77        }
78
79        // Parse variable declarations with function expressions
80        if let Ok(query) = Query::new(
81            &self.language,
82            "(variable_declarator name: (identifier) @name value: (function_expression) @fn)",
83        ) {
84            extract_nodes(
85                &mut nodes,
86                &mut edges,
87                file,
88                &query,
89                root,
90                source_bytes,
91                NodeKind::Function,
92                "fn",
93                &fp,
94            );
95        }
96
97        // Parse class declarations
98        if let Ok(query) =
99            Query::new(&self.language, "(class_declaration name: (type_identifier) @name) @cls")
100        {
101            extract_nodes(
102                &mut nodes,
103                &mut edges,
104                file,
105                &query,
106                root,
107                source_bytes,
108                NodeKind::Class,
109                "cls",
110                &fp,
111            );
112        }
113
114        // Parse method definitions
115        if let Ok(query) = Query::new(
116            &self.language,
117            "(method_definition name: (property_identifier) @name) @m",
118        ) {
119            extract_nodes(
120                &mut nodes,
121                &mut edges,
122                file,
123                &query,
124                root,
125                source_bytes,
126                NodeKind::Function,
127                "fn",
128                &fp,
129            );
130        }
131
132        // Parse imports — walk the tree directly to find import statements
133        extract_imports(&mut edges, root, source_bytes, &fp, file);
134
135        // Parse exports
136        if let Ok(query) = Query::new(
137            &self.language,
138            "(export_statement (function_declaration name: (identifier) @name) @expr)",
139        ) {
140            process_exports(&mut nodes, &mut edges, file, &query, root, source_bytes, &fp, "fn");
141        }
142
143        if let Ok(query) = Query::new(
144            &self.language,
145            "(export_statement (class_declaration name: (type_identifier) @name) @expr)",
146        ) {
147            process_exports(&mut nodes, &mut edges, file, &query, root, source_bytes, &fp, "cls");
148        }
149
150        // Extract CALLS edges by walking the AST with function context tracking
151        extract_calls(&mut edges, root, source_bytes, file);
152
153        Ok(ParseResult { nodes, edges })
154    }
155}
156
157fn file_node_id(rel_path: &str) -> String {
158    format!("file:{}", rel_path)
159}
160
161#[allow(clippy::too_many_arguments)]
162fn extract_nodes(
163    nodes: &mut Vec<NodeDef>,
164    edges: &mut Vec<EdgeDef>,
165    file: &SourceFile,
166    query: &Query,
167    root: tree_sitter::Node,
168    source_bytes: &[u8],
169    kind: NodeKind,
170    prefix: &str,
171    file_id: &str,
172) {
173    let mut cursor = QueryCursor::new();
174    for m in cursor.matches(query, root, source_bytes) {
175        let Some(name_capture) = m
176            .captures
177            .iter()
178            .find(|c| query.capture_names()[c.index as usize] == "name")
179        else {
180            continue;
181        };
182
183        let name = unquote_str(&source_bytes[name_capture.node.byte_range()]);
184        let node_start = name_capture.node.start_position();
185
186        // Use the body node's end position so the snippet covers the full function/class body
187        let body_end = m
188            .captures
189            .iter()
190            .find(|c| {
191                let cap_name = &query.capture_names()[c.index as usize];
192                *cap_name == "fn" || *cap_name == "cls" || *cap_name == "m"
193            })
194            .map(|c| c.node.end_position())
195            .unwrap_or_else(|| name_capture.node.end_position());
196
197        let Some(_fn_capture) = m
198            .captures
199            .iter()
200            .find(|c| {
201                let cap_name = &query.capture_names()[c.index as usize];
202                *cap_name == "fn" || *cap_name == "cls" || *cap_name == "m"
203            })
204        else {
205            continue;
206        };
207
208        let id = format!("{}:{}:{}", prefix, file.relative_path, name);
209
210        nodes.push(NodeDef {
211            id,
212            kind: kind.clone(),
213            name,
214            path: file.relative_path.clone(),
215            line_start: node_start.row as u32 + 1,
216            line_end: body_end.row as u32 + 1,
217            ..Default::default()
218        });
219
220        edges.push(EdgeDef {
221            src: file_id.to_string(),
222            dst: format!("{}:{}:{}", prefix, file.relative_path, unquote_str(
223                &source_bytes[name_capture.node.byte_range()]
224            )),
225            kind: EdgeKind::Exports,
226            ..Default::default()
227        });
228    }
229}
230
231#[allow(clippy::too_many_arguments)]
232fn process_exports(
233    _nodes: &mut Vec<NodeDef>,
234    edges: &mut Vec<EdgeDef>,
235    file: &SourceFile,
236    query: &Query,
237    root: tree_sitter::Node,
238    source_bytes: &[u8],
239    file_id: &str,
240    prefix: &str,
241) {
242    let mut cursor = QueryCursor::new();
243    for m in cursor.matches(query, root, source_bytes) {
244        let Some(name_capture) = m
245            .captures
246            .iter()
247            .find(|c| query.capture_names()[c.index as usize] == "name")
248        else {
249            continue;
250        };
251
252        let name = node_text(name_capture.node, source_bytes);
253
254        edges.push(EdgeDef {
255            src: file_id.to_string(),
256            dst: format!("{}:{}:{}", prefix, file.relative_path, name),
257            kind: EdgeKind::Exports,
258            ..Default::default()
259        });
260    }
261}
262
263fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
264    node.utf8_text(source).unwrap_or("").to_string()
265}
266
267fn extract_imports(
268    edges: &mut Vec<EdgeDef>,
269    root: tree_sitter::Node,
270    source_bytes: &[u8],
271    file_id: &str,
272    file: &SourceFile,
273) {
274    // Walk the entire tree (not just root children) to find imports and requires
275    let mut cursor = root.walk();
276    traverse_imports(edges, root, source_bytes, file_id, file, &mut cursor);
277}
278
279fn traverse_imports(
280    edges: &mut Vec<EdgeDef>,
281    node: tree_sitter::Node,
282    source_bytes: &[u8],
283    file_id: &str,
284    file: &SourceFile,
285    cursor: &mut tree_sitter::TreeCursor,
286) {
287    if node.kind() == "import_statement" {
288        for j in 0..node.child_count() {
289            let Some(import_child) = node.child(j) else { continue };
290            if import_child.kind() == "string" {
291                let import_path = unquote_str(&source_bytes[import_child.byte_range()]);
292                if import_path.starts_with('.') {
293                    let resolved = resolve_import_path(&file.relative_path, &import_path);
294                    if !resolved.is_empty() {
295                        edges.push(EdgeDef {
296                            src: file_id.to_string(),
297                            dst: file_node_id(&resolved),
298                            kind: EdgeKind::Imports,
299                            ..Default::default()
300                        });
301                    }
302                }
303                break;
304            }
305        }
306    } else if node.kind() == "call_expression" {
307        // Check for require('...')
308        if let Some(func) = node.child_by_field_name("function") {
309            if func.kind() == "identifier" && node_text(func, source_bytes) == "require" {
310                if let Some(args) = node.child_by_field_name("arguments") {
311                    for k in 0..args.child_count() {
312                        let Some(arg) = args.child(k) else { continue };
313                        if arg.kind() == "string" {
314                            let import_path = unquote_str(&source_bytes[arg.byte_range()]);
315                            if import_path.starts_with('.') {
316                                let resolved = resolve_import_path(&file.relative_path, &import_path);
317                                if !resolved.is_empty() {
318                                    edges.push(EdgeDef {
319                                        src: file_id.to_string(),
320                                        dst: file_node_id(&resolved),
321                                        kind: EdgeKind::Imports,
322                                        ..Default::default()
323                                    });
324                                }
325                            }
326                            break;
327                        }
328                    }
329                }
330            }
331        }
332    }
333
334    if cursor.goto_first_child() {
335        loop {
336            let child = cursor.node();
337            traverse_imports(edges, child, source_bytes, file_id, file, cursor);
338            if !cursor.goto_next_sibling() {
339                break;
340            }
341        }
342        cursor.goto_parent();
343    }
344}
345
346fn unquote_str(s: &[u8]) -> String {
347    let s = std::str::from_utf8(s).unwrap_or("");
348    s.trim().trim_matches('\'').trim_matches('"').to_string()
349}
350
351fn resolve_import_path(current: &str, import: &str) -> String {
352    let mut parts: Vec<&str> = current.split('/').collect();
353    parts.pop(); // remove filename
354
355    for segment in import.split('/') {
356        match segment {
357            "." => {}
358            ".." => {
359                parts.pop();
360            }
361            _ => parts.push(segment),
362        }
363    }
364
365    parts.join("/")
366}
367
368/// Walk the AST tracking function context, emitting CALLS edges for each call_expression.
369fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
370    let mut fn_stack: Vec<String> = Vec::new();
371    walk_for_calls(edges, root, source, file, &mut fn_stack);
372}
373
374fn is_fn_node(kind: &str) -> bool {
375    matches!(kind,
376        "function_declaration" | "function" | "arrow_function" |
377        "method_definition" | "generator_function_declaration" | "generator_function"
378    )
379}
380
381fn fn_name_from_node<'a>(node: Node<'a>, source: &[u8], file: &SourceFile) -> Option<String> {
382    // function_declaration / generator_function_declaration: has `name` field
383    if let Some(name_node) = node.child_by_field_name("name") {
384        let name = name_node.utf8_text(source).unwrap_or("").to_string();
385        if !name.is_empty() {
386            return Some(format!("fn:{}:{}", file.relative_path, name));
387        }
388    }
389    // Arrow/anonymous assigned to variable: look at parent variable_declarator
390    let parent = node.parent()?;
391    if parent.kind() == "variable_declarator" {
392        if let Some(name_node) = parent.child_by_field_name("name") {
393            let name = name_node.utf8_text(source).unwrap_or("").to_string();
394            if !name.is_empty() {
395                return Some(format!("fn:{}:{}", file.relative_path, name));
396            }
397        }
398    }
399    None
400}
401
402fn walk_for_calls(
403    edges: &mut Vec<EdgeDef>,
404    node: Node,
405    source: &[u8],
406    file: &SourceFile,
407    fn_stack: &mut Vec<String>,
408) {
409    let kind = node.kind();
410    let pushed = is_fn_node(kind);
411
412    if pushed {
413        if let Some(id) = fn_name_from_node(node, source, file) {
414            fn_stack.push(id);
415        } else {
416            // anonymous — push a sentinel so pop is balanced
417            fn_stack.push(String::new());
418        }
419    }
420
421    if kind == "call_expression" {
422        if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
423            let callee_name = node
424                .child_by_field_name("function")
425                .and_then(|func| match func.kind() {
426                    "identifier" => Some(func.utf8_text(source).unwrap_or("").to_string()),
427                    "member_expression" => func
428                        .child_by_field_name("property")
429                        .map(|p| p.utf8_text(source).unwrap_or("").to_string()),
430                    _ => None,
431                })
432                .unwrap_or_default();
433
434            if !callee_name.is_empty() && callee_name != "require" {
435                edges.push(EdgeDef {
436                    src: caller_id.clone(),
437                    dst: callee_name,
438                    kind: EdgeKind::Calls,
439                    confidence: 0.7,
440                    ..Default::default()
441                });
442            }
443        }
444    }
445
446    let mut cursor = node.walk();
447    if cursor.goto_first_child() {
448        loop {
449            walk_for_calls(edges, cursor.node(), source, file, fn_stack);
450            if !cursor.goto_next_sibling() {
451                break;
452            }
453        }
454    }
455
456    if pushed {
457        fn_stack.pop();
458    }
459}