Skip to main content

cgx_engine/parsers/
ts.rs

1use tree_sitter::{Node, Parser, Query, QueryCursor};
2
3use crate::parser::{EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult};
4use crate::walker::SourceFile;
5
6pub struct TypeScriptParser;
7
8impl TypeScriptParser {
9    pub fn new() -> Self {
10        Self
11    }
12}
13
14impl Default for TypeScriptParser {
15    fn default() -> Self {
16        Self::new()
17    }
18}
19
20fn is_jsx_extension(path: &str) -> bool {
21    path.ends_with(".tsx") || path.ends_with(".jsx")
22}
23
24impl LanguageParser for TypeScriptParser {
25    fn extensions(&self) -> &[&str] {
26        &["ts", "tsx", "js", "jsx", "mjs", "cjs"]
27    }
28
29    fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
30        // TSX/JSX files must use the TSX grammar; TypeScript grammar rejects JSX syntax
31        // and produces error nodes with wrong line positions for every JSX element.
32        let language = if is_jsx_extension(&file.relative_path) {
33            tree_sitter_typescript::language_tsx()
34        } else {
35            tree_sitter_typescript::language_typescript()
36        };
37
38        let mut parser = Parser::new();
39        parser.set_language(&language)?;
40
41        let tree = parser
42            .parse(&file.content, None)
43            .ok_or_else(|| anyhow::anyhow!("failed to parse {}", file.relative_path))?;
44
45        let source_bytes = file.content.as_bytes();
46        let root = tree.root_node();
47        let mut nodes = Vec::new();
48        let mut edges = Vec::new();
49
50        let fp = file_node_id(&file.relative_path);
51
52        // Parse function declarations
53        if let Ok(query) = Query::new(
54            &language,
55            "(function_declaration name: (identifier) @name) @fn",
56        ) {
57            extract_nodes(
58                &mut nodes,
59                &mut edges,
60                file,
61                &query,
62                root,
63                source_bytes,
64                NodeKind::Function,
65                "fn",
66                &fp,
67            );
68        }
69
70        // Parse arrow functions / variable declarations with arrow
71        if let Ok(query) = Query::new(
72            &language,
73            "(variable_declarator name: (identifier) @name value: (arrow_function) @fn)",
74        ) {
75            extract_nodes(
76                &mut nodes,
77                &mut edges,
78                file,
79                &query,
80                root,
81                source_bytes,
82                NodeKind::Function,
83                "fn",
84                &fp,
85            );
86        }
87
88        // Parse variable declarations with function expressions
89        if let Ok(query) = Query::new(
90            &language,
91            "(variable_declarator name: (identifier) @name value: (function_expression) @fn)",
92        ) {
93            extract_nodes(
94                &mut nodes,
95                &mut edges,
96                file,
97                &query,
98                root,
99                source_bytes,
100                NodeKind::Function,
101                "fn",
102                &fp,
103            );
104        }
105
106        // Parse class declarations
107        if let Ok(query) = Query::new(
108            &language,
109            "(class_declaration name: (type_identifier) @name) @cls",
110        ) {
111            extract_nodes(
112                &mut nodes,
113                &mut edges,
114                file,
115                &query,
116                root,
117                source_bytes,
118                NodeKind::Class,
119                "cls",
120                &fp,
121            );
122        }
123
124        // Parse method definitions
125        if let Ok(query) = Query::new(
126            &language,
127            "(method_definition name: (property_identifier) @name) @m",
128        ) {
129            extract_nodes(
130                &mut nodes,
131                &mut edges,
132                file,
133                &query,
134                root,
135                source_bytes,
136                NodeKind::Function,
137                "fn",
138                &fp,
139            );
140        }
141
142        // Parse imports — walk the tree directly to find import statements
143        extract_imports(&mut edges, root, source_bytes, &fp, file);
144
145        // Parse exports
146        if let Ok(query) = Query::new(
147            &language,
148            "(export_statement (function_declaration name: (identifier) @name) @expr)",
149        ) {
150            process_exports(
151                &mut nodes,
152                &mut edges,
153                file,
154                &query,
155                root,
156                source_bytes,
157                &fp,
158                "fn",
159            );
160        }
161
162        if let Ok(query) = Query::new(
163            &language,
164            "(export_statement (class_declaration name: (type_identifier) @name) @expr)",
165        ) {
166            process_exports(
167                &mut nodes,
168                &mut edges,
169                file,
170                &query,
171                root,
172                source_bytes,
173                &fp,
174                "cls",
175            );
176        }
177
178        // Extract CALLS edges by walking the AST with function context tracking
179        extract_calls(&mut edges, root, source_bytes, file);
180
181        Ok(ParseResult { nodes, edges })
182    }
183}
184
185fn file_node_id(rel_path: &str) -> String {
186    format!("file:{}", rel_path)
187}
188
189#[allow(clippy::too_many_arguments)]
190fn extract_nodes(
191    nodes: &mut Vec<NodeDef>,
192    edges: &mut Vec<EdgeDef>,
193    file: &SourceFile,
194    query: &Query,
195    root: tree_sitter::Node,
196    source_bytes: &[u8],
197    kind: NodeKind,
198    prefix: &str,
199    file_id: &str,
200) {
201    let mut cursor = QueryCursor::new();
202    for m in cursor.matches(query, root, source_bytes) {
203        let Some(name_capture) = m
204            .captures
205            .iter()
206            .find(|c| query.capture_names()[c.index as usize] == "name")
207        else {
208            continue;
209        };
210
211        let name = unquote_str(&source_bytes[name_capture.node.byte_range()]);
212        let node_start = name_capture.node.start_position();
213
214        // Use the body node's end position so the snippet covers the full function/class body
215        let body_end = m
216            .captures
217            .iter()
218            .find(|c| {
219                let cap_name = &query.capture_names()[c.index as usize];
220                *cap_name == "fn" || *cap_name == "cls" || *cap_name == "m"
221            })
222            .map(|c| c.node.end_position())
223            .unwrap_or_else(|| name_capture.node.end_position());
224
225        let Some(_fn_capture) = m.captures.iter().find(|c| {
226            let cap_name = &query.capture_names()[c.index as usize];
227            *cap_name == "fn" || *cap_name == "cls" || *cap_name == "m"
228        }) else {
229            continue;
230        };
231
232        let id = format!("{}:{}:{}", prefix, file.relative_path, name);
233
234        nodes.push(NodeDef {
235            id,
236            kind: kind.clone(),
237            name,
238            path: file.relative_path.clone(),
239            line_start: node_start.row as u32 + 1,
240            line_end: body_end.row as u32 + 1,
241            ..Default::default()
242        });
243
244        edges.push(EdgeDef {
245            src: file_id.to_string(),
246            dst: format!(
247                "{}:{}:{}",
248                prefix,
249                file.relative_path,
250                unquote_str(&source_bytes[name_capture.node.byte_range()])
251            ),
252            kind: EdgeKind::Exports,
253            ..Default::default()
254        });
255    }
256}
257
258#[allow(clippy::too_many_arguments)]
259fn process_exports(
260    _nodes: &mut Vec<NodeDef>,
261    edges: &mut Vec<EdgeDef>,
262    file: &SourceFile,
263    query: &Query,
264    root: tree_sitter::Node,
265    source_bytes: &[u8],
266    file_id: &str,
267    prefix: &str,
268) {
269    let mut cursor = QueryCursor::new();
270    for m in cursor.matches(query, root, source_bytes) {
271        let Some(name_capture) = m
272            .captures
273            .iter()
274            .find(|c| query.capture_names()[c.index as usize] == "name")
275        else {
276            continue;
277        };
278
279        let name = node_text(name_capture.node, source_bytes);
280
281        edges.push(EdgeDef {
282            src: file_id.to_string(),
283            dst: format!("{}:{}:{}", prefix, file.relative_path, name),
284            kind: EdgeKind::Exports,
285            ..Default::default()
286        });
287    }
288}
289
290fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
291    node.utf8_text(source).unwrap_or("").to_string()
292}
293
294fn extract_imports(
295    edges: &mut Vec<EdgeDef>,
296    root: tree_sitter::Node,
297    source_bytes: &[u8],
298    file_id: &str,
299    file: &SourceFile,
300) {
301    // Walk the entire tree (not just root children) to find imports and requires
302    let mut cursor = root.walk();
303    traverse_imports(edges, root, source_bytes, file_id, file, &mut cursor);
304}
305
306fn traverse_imports(
307    edges: &mut Vec<EdgeDef>,
308    node: tree_sitter::Node,
309    source_bytes: &[u8],
310    file_id: &str,
311    file: &SourceFile,
312    cursor: &mut tree_sitter::TreeCursor,
313) {
314    if node.kind() == "import_statement" {
315        for j in 0..node.child_count() {
316            let Some(import_child) = node.child(j) else {
317                continue;
318            };
319            if import_child.kind() == "string" {
320                let import_path = unquote_str(&source_bytes[import_child.byte_range()]);
321                if import_path.starts_with('.') {
322                    let resolved = resolve_import_path(&file.relative_path, &import_path);
323                    if !resolved.is_empty() {
324                        edges.push(EdgeDef {
325                            src: file_id.to_string(),
326                            dst: file_node_id(&resolved),
327                            kind: EdgeKind::Imports,
328                            ..Default::default()
329                        });
330                    }
331                }
332                break;
333            }
334        }
335    } else if node.kind() == "call_expression" {
336        // Check for require('...')
337        if let Some(func) = node.child_by_field_name("function") {
338            if func.kind() == "identifier" && node_text(func, source_bytes) == "require" {
339                if let Some(args) = node.child_by_field_name("arguments") {
340                    for k in 0..args.child_count() {
341                        let Some(arg) = args.child(k) else { continue };
342                        if arg.kind() == "string" {
343                            let import_path = unquote_str(&source_bytes[arg.byte_range()]);
344                            if import_path.starts_with('.') {
345                                let resolved =
346                                    resolve_import_path(&file.relative_path, &import_path);
347                                if !resolved.is_empty() {
348                                    edges.push(EdgeDef {
349                                        src: file_id.to_string(),
350                                        dst: file_node_id(&resolved),
351                                        kind: EdgeKind::Imports,
352                                        ..Default::default()
353                                    });
354                                }
355                            }
356                            break;
357                        }
358                    }
359                }
360            }
361        }
362    }
363
364    if cursor.goto_first_child() {
365        loop {
366            let child = cursor.node();
367            traverse_imports(edges, child, source_bytes, file_id, file, cursor);
368            if !cursor.goto_next_sibling() {
369                break;
370            }
371        }
372        cursor.goto_parent();
373    }
374}
375
376fn unquote_str(s: &[u8]) -> String {
377    let s = std::str::from_utf8(s).unwrap_or("");
378    s.trim().trim_matches('\'').trim_matches('"').to_string()
379}
380
381fn resolve_import_path(current: &str, import: &str) -> String {
382    let mut parts: Vec<&str> = current.split('/').collect();
383    parts.pop(); // remove filename
384
385    for segment in import.split('/') {
386        match segment {
387            "." => {}
388            ".." => {
389                parts.pop();
390            }
391            _ => parts.push(segment),
392        }
393    }
394
395    parts.join("/")
396}
397
398/// Walk the AST tracking function context, emitting CALLS edges for each call_expression.
399fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
400    let mut fn_stack: Vec<String> = Vec::new();
401    walk_for_calls(edges, root, source, file, &mut fn_stack);
402}
403
404fn is_fn_node(kind: &str) -> bool {
405    matches!(
406        kind,
407        "function_declaration"
408            | "function"
409            | "arrow_function"
410            | "method_definition"
411            | "generator_function_declaration"
412            | "generator_function"
413    )
414}
415
416fn fn_name_from_node<'a>(node: Node<'a>, source: &[u8], file: &SourceFile) -> Option<String> {
417    // function_declaration / generator_function_declaration: has `name` field
418    if let Some(name_node) = node.child_by_field_name("name") {
419        let name = name_node.utf8_text(source).unwrap_or("").to_string();
420        if !name.is_empty() {
421            return Some(format!("fn:{}:{}", file.relative_path, name));
422        }
423    }
424    // Arrow/anonymous assigned to variable: look at parent variable_declarator
425    let parent = node.parent()?;
426    if parent.kind() == "variable_declarator" {
427        if let Some(name_node) = parent.child_by_field_name("name") {
428            let name = name_node.utf8_text(source).unwrap_or("").to_string();
429            if !name.is_empty() {
430                return Some(format!("fn:{}:{}", file.relative_path, name));
431            }
432        }
433    }
434    None
435}
436
437fn walk_for_calls(
438    edges: &mut Vec<EdgeDef>,
439    node: Node,
440    source: &[u8],
441    file: &SourceFile,
442    fn_stack: &mut Vec<String>,
443) {
444    let kind = node.kind();
445    let pushed = is_fn_node(kind);
446
447    if pushed {
448        if let Some(id) = fn_name_from_node(node, source, file) {
449            fn_stack.push(id);
450        } else {
451            // anonymous — push a sentinel so pop is balanced
452            fn_stack.push(String::new());
453        }
454    }
455
456    if kind == "call_expression" {
457        if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
458            let callee_name = node
459                .child_by_field_name("function")
460                .and_then(|func| match func.kind() {
461                    "identifier" => Some(func.utf8_text(source).unwrap_or("").to_string()),
462                    "member_expression" => func
463                        .child_by_field_name("property")
464                        .map(|p| p.utf8_text(source).unwrap_or("").to_string()),
465                    _ => None,
466                })
467                .unwrap_or_default();
468
469            if !callee_name.is_empty() && callee_name != "require" {
470                edges.push(EdgeDef {
471                    src: caller_id.clone(),
472                    dst: callee_name,
473                    kind: EdgeKind::Calls,
474                    confidence: 0.7,
475                    ..Default::default()
476                });
477            }
478        }
479    }
480
481    // JSX component usage: <ComponentName ... /> and <ComponentName ...>
482    // Treat JSX elements as calls from the enclosing function to the component.
483    if kind == "jsx_opening_element" || kind == "jsx_self_closing_element" {
484        if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
485            let tag_name = node
486                .child_by_field_name("name")
487                .map(|n| n.utf8_text(source).unwrap_or("").to_string())
488                .unwrap_or_default();
489
490            // Only emit edges for PascalCase or camelCase user-defined components.
491            // Lowercase tags like <div>, <span> are HTML intrinsics — skip them.
492            let is_component = tag_name
493                .chars()
494                .next()
495                .map(|c| {
496                    c.is_uppercase()
497                        || (c.is_lowercase() && tag_name.len() > 3 && tag_name.contains('.'))
498                })
499                .unwrap_or(false);
500
501            if is_component {
502                // Strip member access for <Namespace.Component /> — use only the last segment
503                let callee = tag_name
504                    .split('.')
505                    .next_back()
506                    .unwrap_or(&tag_name)
507                    .to_string();
508                edges.push(EdgeDef {
509                    src: caller_id.clone(),
510                    dst: callee,
511                    kind: EdgeKind::Calls,
512                    confidence: 0.6,
513                    ..Default::default()
514                });
515            }
516        }
517    }
518
519    let mut cursor = node.walk();
520    if cursor.goto_first_child() {
521        loop {
522            walk_for_calls(edges, cursor.node(), source, file, fn_stack);
523            if !cursor.goto_next_sibling() {
524                break;
525            }
526        }
527    }
528
529    if pushed {
530        fn_stack.pop();
531    }
532}